In [25]:
import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from evidently.report import Report
from evidently.metric_preset import ClassificationPreset
import os
import cv2
import numpy as np
import mlflow
from scipy.stats import wasserstein_distance



In [27]:

# Set MLflow experiment name
mlflow.set_experiment("Concept Drift Detection")

<Experiment: artifact_location='file:///Users/ximenamoure/Desktop/concept_drift/mlruns/457136894682190825', creation_time=1728658822037, experiment_id='457136894682190825', last_update_time=1728658822037, lifecycle_stage='active', name='Concept Drift Detection', tags={}>

In [28]:
model = load_model('/Users/ximenamoure/Desktop/drift_last/models/mobilenet_v2_occupancy_tf216.keras')

In [29]:
ref_path = "/Users/ximenamoure/Desktop/drift_last/reference_dataset"

In [30]:
new_path = "/Users/ximenamoure/Chess-Piece-Classification-Dataset/images/processed/occupancy/split0"

In [31]:
#split_name = os.path.basename(new_path)

In [32]:
categories = ["bb", "bk", "bn", "bp", "bq", "br", "wb", "wk", "wn", "wp", "wq", "wr", "empty"]

In [33]:
label_mapping = {category: 1 for category in categories if category != "empty"}
label_mapping["empty"] = 0

In [34]:
new_label_mapping = {
    "wb": 1, "wk": 1, "wn": 1, "wp": 1, "wq": 1, "wr": 1,  # White pieces marked as "occupied" (1)
    "bb": 0, "bk": 0, "bn": 0, "bp": 0, "bq": 0, "br": 0,  # Black pieces marked as "empty" (0)
    "empty": 0  # Keep empty squares as "empty" (0)
}

In [35]:
def load_images_and_labels_with_new_interpretation(base_path, categories, label_mapping):
    images = []
    labels = []
    image_paths = []

    for category in categories:
        category_path = os.path.join(base_path, category)
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 128))
            images.append(img)
            labels.append(label_mapping[category])
            image_paths.append(img_path)

    return np.array(images), np.array(labels), np.array(image_paths)


In [36]:
# Function to load images and their labels
def load_images_and_labels(base_path, categories):
    images = []
    labels = []
    image_paths = []

    for category in categories:
        category_path = os.path.join(base_path, category)
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (128, 128))
            images.append(img)
            labels.append(label_mapping[category])
            image_paths.append(img_path)


    return np.array(images), np.array(labels), np.array(image_paths)

In [37]:
def get_model_predictions(model, images, batch_size=32, threshold = 0.5):
    num_images = len(images)
    predictions = []

    # Process in batches
    for start in range(0, num_images, batch_size):
        end = min(start + batch_size, num_images)
        batch_images = images[start:end]
        batch_predictions = model.predict(batch_images)
        print("predictions", batch_predictions)

        # Convert probabilities to binary labels based on threshold
        batch_labels = (batch_predictions >= threshold).astype(int)

        # Flatten the array and add to the list of predictions
        predictions.extend(batch_labels.flatten())
        print(f"Processed {end}/{num_images} images ({(end / num_images) * 100:.2f}% complete)")

    return np.array(predictions)

In [38]:
def get_model_scores(model, images, batch_size=32):
    num_images = len(images)
    scores = []
    for start in range(0, num_images, batch_size):
        end = min(start + batch_size, num_images)
        batch_images = images[start:end]
        batch_scores = model.predict(batch_images)
        scores.extend(batch_scores.flatten())

        print(f"Processed {end}/{num_images} images ({(end / num_images) * 100:.2f}% complete)")

    return np.array(scores)

In [39]:
def get_metrics(ground_truth, predictions):
    accuracy = accuracy_score(ground_truth, predictions)
    precision = precision_score(ground_truth, predictions)
    recall = recall_score(ground_truth, predictions)
    f1 = f1_score(ground_truth, predictions)
    return accuracy, precision, recall, f1

In [40]:
# Function to calculate batch-wise error rates
def batch_error_rates(model, images, labels, batch_size=32):
    batch_errors = []
    for start in range(0, len(images), batch_size):
        end = min(start + batch_size, len(images))
        batch_images = images[start:end]
        batch_labels = labels[start:end]

        # Get predictions for the batch
        batch_preds = model.predict(batch_images)
        batch_preds = (batch_preds >= 0.5).astype(int).flatten()

        # Calculate error rate for this batch
        incorrect = np.sum(batch_preds != batch_labels)
        error_rate = incorrect / len(batch_labels)
        batch_errors.append(error_rate)

    return batch_errors


In [41]:
images_ref, labels_ref, img_paths_ref = load_images_and_labels(ref_path, categories)

In [42]:
images, drifted_labels, image_paths = load_images_and_labels_with_new_interpretation(ref_path, categories, new_label_mapping)

In [43]:
""" images_new, labels_new, img_paths_new = load_images_and_labels(new_path, categories) """

' images_new, labels_new, img_paths_new = load_images_and_labels(new_path, categories) '

In [44]:
model = load_model('/Users/ximenamoure/Desktop/drift_last/models/mobilenet_v2_occupancy_tf216.keras')

In [45]:
split_name = "ref_split_target_shifted"

In [46]:
accuracy_drop_threshold = 0.1 # for a 10% decrease
f1_drop_threshold = 0.1 # for a 10% decrease

In [47]:
ws_threshold = 0.12

In [48]:
# --- Start MLflow Run ---
with mlflow.start_run() as run:
    mlflow.set_tag("mlflow.runName", "shift_in_target_variable")
    print("Processing and logging reference split...")
    images_ref, labels_ref, img_paths_ref = load_images_and_labels(ref_path, categories)
    predictions_ref = get_model_predictions(model, images_ref)
    batch_errors_ref = batch_error_rates(model, images_ref, labels_ref)
    accuracy_ref, precision_ref, recall_ref, f1_ref = get_metrics(labels_ref, predictions_ref)

    # Log reference metrics and batch errors as an artifact
    mlflow.log_metrics({
        "accuracy_ref": accuracy_ref,
        "precision_ref": precision_ref,
        "recall_ref": recall_ref,
        "f1_ref": f1_ref
    })
    images_new = images_ref
    labels_new = drifted_labels
    predictions_new = get_model_predictions(model, images_new)
    batch_errors_new = batch_error_rates(model, images_new, labels_new)
    accuracy_new, precision_new, recall_new, f1_new = get_metrics(labels_new, predictions_new)
    # Log new split metrics and error rates to MLflow
    mlflow.log_metrics({
            f"{split_name}_accuracy": accuracy_new,
            f"{split_name}_precision": precision_new,
            f"{split_name}_recall": recall_new,
            f"{split_name}_f1": f1_new
    })

    # --- Long-Term Drift Detection with Reference Split ---
    wd_ref = wasserstein_distance(batch_errors_ref, batch_errors_new)
    # Detect drift based on threshold
    drift_detected_ref = wd_ref > ws_threshold

    print("drfit_detected", drift_detected_ref)

    mlflow.log_metric(f"{split_name}_wasserstein_distance_ref", wd_ref)
    mlflow.log_metric(f"{split_name}_drift_detected_ref", int(drift_detected_ref))

    #--- Generate and Log Evidently Report ---
    reference_data = pd.DataFrame({'prediction': predictions_ref, 'target': labels_ref, 'dataset': 'reference'})
    new_data = pd.DataFrame({'prediction': predictions_new, 'target': labels_new, 'dataset': 'new'})
    classification_report = Report(metrics=[ClassificationPreset()])
    classification_report.run(reference_data=reference_data, current_data=new_data)

    # Save and log the Evidently report
    report_file = f"{split_name}_classification_report.html"
    classification_report.save_html(report_file)
    mlflow.log_artifact(report_file)

    results = classification_report.as_dict()
    current_metrics = results["metrics"][0]["result"]["current"]
    reference_metrics = results["metrics"][0]["result"]["reference"]
    accuracy_current = current_metrics["accuracy"]
    f1_current = current_metrics["f1"]
    recall_current = current_metrics["recall"]
    accuracy_reference = reference_metrics["accuracy"]
    f1_reference = reference_metrics["f1"]

    accuracy_significant_drop = (accuracy_reference - accuracy_current) > (accuracy_reference * accuracy_drop_threshold)
    f1_significant_drop = (f1_reference - f1_current) > (f1_reference * f1_drop_threshold)
    print("Significant Accuracy Drop:", accuracy_significant_drop)
    print("Significant F1 Score Drop:", f1_significant_drop)

    mlflow.log_metric("accuracy_significant_drop", int(accuracy_significant_drop))
    mlflow.log_metric("f1_significant_drop", int(f1_significant_drop))
    mlflow.log_metric(f"{split_name}_accuracy", accuracy_current)
    mlflow.log_metric(f"{split_name}_f1", f1_current)
    mlflow.log_metric(f"{split_name}_recall", recall_current)


    print("Run completed and all data logged to MLflow!")

Processing and logging reference split...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 516ms/step
predictions [[1.        ]
 [0.9999347 ]
 [0.9999981 ]
 [0.9999957 ]
 [1.        ]
 [0.99999744]
 [0.9997682 ]
 [1.        ]
 [1.        ]
 [0.9999985 ]
 [0.99998176]
 [1.        ]
 [1.        ]
 [0.9999979 ]
 [1.        ]
 [1.        ]
 [0.9999997 ]
 [0.99999976]
 [0.9999998 ]
 [0.9999974 ]
 [0.99999994]
 [0.99999964]
 [1.        ]
 [0.9999996 ]
 [0.99999964]
 [1.        ]
 [0.9999997 ]
 [0.9999881 ]
 [0.9999997 ]
 [1.        ]
 [1.        ]
 [0.9999964 ]]
Processed 32/48317 images (0.07% complete)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
predictions [[1.        ]
 [0.99999315]
 [0.9999996 ]
 [0.99972886]
 [0.9999918 ]
 [0.99999875]
 [1.        ]
 [1.        ]
 [1.        ]
 [0.9999956 ]
 [0.9999936 ]
 [0.9999996 ]
 [0.9999942 ]
 [0.9999999 ]
 [1.        ]
 [0.9999997 ]
 [0.9999988 ]
 [1.        ]
 [1.        ]
 [1.        ]
 [0.9994605 ]
 [0