In [None]:
###############################################################################
# 1) IMPORTS & SETUP
###############################################################################
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras.models import load_model

from mapie.classification import MapieClassifier
import joblib  # if you also want to load .pkl scikit-learn models

###############################################################################
# 2) KERAS WRAPPER FOR MAPIE (Classification)
###############################################################################
from sklearn.base import BaseEstimator, ClassifierMixin

class KerasClassifier(BaseEstimator, ClassifierMixin):
    """
    Wraps a pre-trained Keras model (.h5) so that it can be used
    by MAPIE's MapieClassifier in "prefit" mode.

    - .fit(X, y) loads the .h5 model and sets self.classes_ (no actual re-training).
    - .predict_proba(X) returns an (n_samples, n_classes) array of probabilities.
    - .predict(X, **kwargs) returns the predicted class index for each sample,
      ignoring extra args like 'prediction_type'.
    """
    def __init__(self, model_path, n_classes=11):
        self.model_path = model_path
        self.model_ = None
        self.classes_ = np.arange(n_classes)  # default 0..10
    
    def fit(self, X, y, **kwargs):
        # Load the pre-trained CNN
        self.model_ = load_model(self.model_path)
        # If you want to set classes_ dynamically based on y:
        # self.classes_ = np.unique(y)  # but if you always expect 0..10, keep as is
        return self

    def predict_proba(self, X, **kwargs):
        proba = self.model_.predict(X)
        return proba

    def predict(self, X, **kwargs):
        proba = self.predict_proba(X)
        max_idx = np.argmax(proba, axis=1)
        return self.classes_[max_idx]

###############################################################################
# 3) LOAD & SPLIT DATA
###############################################################################
def load_and_split_data(csv_path, test_size=0.2, random_state=42):
    """
    - Load combined_data_imputed.csv
    - Group-split by 'participant_id' into calibration and test sets
    - Return X_cal, y_cal, X_test, y_test
    """
    df = pd.read_csv(csv_path)

    # Suppose each label is a column name with substring '(sliderNeutralPos)'
    # or just 'slider'. We'll find them by your logic:
    label_cols = [c for c in df.columns if 'slider' in c.lower()]

    # Identify features, for instance, everything but participant_id and label columns
    # Or filter by a pattern like you did with '^(hrv|eda|acc|ibis)'
    # For simplicity, let's do the pattern approach:
    feature_cols = df.filter(regex='^(hrv|eda|acc|ibis|num_ibis)').columns.tolist()

    # GroupSplit
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    participants = df['participant_id'].values

    train_idx, test_idx = next(gss.split(df, groups=participants))

    df_cal = df.iloc[train_idx].copy()  # "calibration" set for MAPIE
    df_test = df.iloc[test_idx].copy()  # "test" set

    X_cal = df_cal[feature_cols]
    X_test = df_test[feature_cols]

    # We'll return the entire df_cal, df_test plus feature names so we can
    # look up each label as needed.
    return df_cal, df_test, X_cal, X_test, label_cols


###############################################################################
# 4) APPLY MAPIE WITH FALLBACK FOR DEGENERATE INTERVALS
###############################################################################
def apply_mapie_fallback(model, X_cal, y_cal, X_test, alpha=0.1):
    """
    - Uses MAPIE in prefit mode to get intervals (or sets) for classification.
    - If degenerate single-class intervals appear, fallback to probability-based "intervals."
    - Returns (y_pred, lower_bounds, upper_bounds).
    """
    mapie_clf = MapieClassifier(
        estimator=model,
        method="score",
        cv="prefit",   # prefit => do not re-train underlying model
    )
    # Fit MAPIE (it won't re-train 'model', but it needs calibration data to measure errors)
    mapie_clf.fit(X_cal, y_cal)

    y_pred, y_pis = mapie_clf.predict(X_test, alpha=[alpha], prediction_type="set")
    # shape y_pred: (n_samples,)
    # shape y_pis: (n_samples, 1, n_classes) in multi-class scenario

    # Let's examine shape
    print("[MAPIE] y_pred shape:", y_pred.shape)
    print("[MAPIE] y_pis shape:", y_pis.shape)

    # Fallback logic
    if y_pis.ndim == 3 and y_pis.shape[2] == 1:
        # degenerate single-class intervals
        print("Degenerate intervals -> fallback to probabilities.")
        if hasattr(model, "predict_proba"):
            y_prob = model.predict_proba(X_test)
            lower_bounds = y_prob.min(axis=1)
            upper_bounds = y_prob.max(axis=1)
        else:
            lower_bounds = np.full(len(X_test), np.nan)
            upper_bounds = np.full(len(X_test), np.nan)
    else:
        # normal multi-class intervals => extract intervals for predicted class
        # Typically shape: (n_samples, 1, n_classes)
        classes_ = mapie_clf.classes_
        class_idx = np.array([np.where(classes_ == c)[0][0] for c in y_pred])
        lower_bounds = y_pis[np.arange(len(y_pis)), class_idx, 0]
        upper_bounds = y_pis[np.arange(len(y_pis)), class_idx, 1]

    return y_pred, lower_bounds, upper_bounds


###############################################################################
# 5) MAIN EVALUATION LOOP
###############################################################################
def evaluate_pretrained_models(csv_path, models_base_dir):
    """
    1) Load & group-split data
    2) For each label subfolder in 'models_base_dir', load the CNN .h5 model.
    3) Wrap with KerasClassifier, apply MAPIE in prefit mode using fallback intervals.
    4) Save intervals to CSV.
    """
    df_cal, df_test, X_cal, X_test, label_cols = load_and_split_data(csv_path)

    # For each label col, we assume there's a subfolder: models_base_dir/<label>/cnn/*.h5
    # We'll just demonstrate for the 'cnn' model type. If you have 'lstm' or 'rf', do similarly.
    results = []

    labels_found = [
        d for d in os.listdir(models_base_dir)
        if os.path.isdir(os.path.join(models_base_dir, d))
    ]

    for label_name in labels_found:
        # If this label doesn't appear in your data's label_cols, skip
        if label_name not in label_cols:
            print(f"[SKIP] {label_name} not found in CSV label columns.")
            continue

        y_cal = df_cal[label_name].astype(int)
        y_test = df_test[label_name].astype(int)

        # Build path to CNN folder
        cnn_path = os.path.join(models_base_dir, label_name, "cnn")
        if not os.path.isdir(cnn_path):
            print(f"[SKIP] No 'cnn' subfolder for label {label_name}")
            continue

        # Find the .h5 model file
        model_files = [
            f for f in os.listdir(cnn_path)
            if f.endswith(".h5") and os.path.isfile(os.path.join(cnn_path, f))
        ]
        if not model_files:
            print(f"[SKIP] No .h5 model file found in {cnn_path} for label {label_name}")
            continue

        # Just take the first .h5 we find
        h5_file = model_files[0]
        full_h5_path = os.path.join(cnn_path, h5_file)

        print(f"\n=== Processing Label: {label_name}, Model: cnn, File: {h5_file} ===")
        # Wrap in KerasClassifier
        wrapped_cnn = KerasClassifier(model_path=full_h5_path, n_classes=11)
        # 'Fit' the wrapper => loads the .h5
        wrapped_cnn.fit(X_cal, y_cal)
        
        
        
        # Apply MAPIE
        y_pred, lower, upper = apply_mapie_fallback(wrapped_cnn, X_cal, y_cal, X_test, alpha=0.1)
        # Save intervals to a CSV
        out_dir = os.path.join("results_mapie", label_name)
        os.makedirs(out_dir, exist_ok=True)
        intervals_file = os.path.join(out_dir, f"cnn_intervals_{label_name}.csv")

        intervals_df = pd.DataFrame({
            "Index": np.arange(len(X_test)),
            "y_test": y_test,
            "y_pred": y_pred,
            "Lower_Bound": lower,
            "Upper_Bound": upper,
        })
        intervals_df.to_csv(intervals_file, index=False)
        print(f"[INFO] Intervals saved to {intervals_file}")

        # Optionally store coverage or other metrics
        # (You can define your custom coverage measure if you want)
        results.append({
            "label": label_name,
            "model_type": "cnn",
            "model_file": h5_file,
            "interval_csv": intervals_file,
        })

    # Summarize
    df_results = pd.DataFrame(results)
    summary_path = "results_mapie/all_results.csv"
    df_results.to_csv(summary_path, index=False)
    print(f"\n=== All results saved to {summary_path} ===")


###############################################################################
# 6) RUN IT
###############################################################################
if __name__ == "__main__":
    # CSV path for your entire dataset
    CSV_PATH = "cleaned_dataset_0307.csv"

    # Base directory where your trained models are stored
    MODELS_BASE_DIR = "folds_original_models/fold_4/models"

    evaluate_pretrained_models(CSV_PATH, MODELS_BASE_DIR)


In [None]:
###############################################################################
# 1) IMPORTS & SETUP
###############################################################################
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras.models import load_model
from mapie.classification import MapieClassifier

import joblib
from sklearn.base import BaseEstimator, ClassifierMixin

###############################################################################
# 2) WRAPPERS FOR KERAS & SKLEARN MODELS
###############################################################################
class KerasClassifier(BaseEstimator, ClassifierMixin):
    """
    Wraps a pre-trained Keras model (.h5) so that it can be used
    by MAPIE's MapieClassifier in "prefit" mode.

    - .fit(X, y) loads the .h5 model (no re-training).
    - .predict_proba(X) returns an (n_samples, n_classes) array.
    - .predict(X, **kwargs) returns the predicted class index for each sample.
    """
    def __init__(self, model_path, n_classes=11):
        self.model_path = model_path
        self.model_ = None
        self.classes_ = np.arange(n_classes)  # default 0..10
    
    def fit(self, X, y, **kwargs):
        self.model_ = load_model(self.model_path)
        # Optionally: self.classes_ = np.unique(y)
        return self

    def predict_proba(self, X, **kwargs):
        return self.model_.predict(X)

    def predict(self, X, **kwargs):
        proba = self.predict_proba(X)
        max_idx = np.argmax(proba, axis=1)
        return self.classes_[max_idx]


class SklearnClassifierWrapper(BaseEstimator, ClassifierMixin):
    """
    Wraps a pre-trained scikit-learn classifier so that it can
    ignore extra arguments like 'prediction_type' from MAPIE.
    """
    def __init__(self, model):
        self.model = model
        self.classes_ = None

    def fit(self, X, y):
        # 'Prefit' => no real training, but set self.classes_ if available
        if hasattr(self.model, "classes_"):
            self.classes_ = self.model.classes_
        else:
            self.classes_ = np.unique(y)
        return self

    def predict_proba(self, X, **kwargs):
        return self.model.predict_proba(X)

    def predict(self, X, **kwargs):
        return self.model.predict(X)

###############################################################################
# 3) LOAD & SPLIT DATA
###############################################################################
def load_and_split_data(csv_path, test_size=0.2, random_state=42):
    df = pd.read_csv(csv_path)

    # Identify labels
    label_cols = [c for c in df.columns if 'slider' in c.lower()]

    # Identify features (example: columns starting with hrv|eda|acc|ibis)
    feature_cols = df.filter(regex='^(hrv|eda|acc|num_ibis)').columns.tolist()

    # Group-based split
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    participants = df['participant_id'].values

    train_idx, test_idx = next(gss.split(df, groups=participants))

    df_cal = df.iloc[train_idx].copy()  # calibration set
    df_test = df.iloc[test_idx].copy()  # test set

    X_cal = df_cal[feature_cols]
    X_test = df_test[feature_cols]

    return df_cal, df_test, X_cal, X_test, label_cols

###############################################################################
# 4) APPLY MAPIE WITH FALLBACK FOR DEGENERATE INTERVALS
###############################################################################
def apply_mapie_fallback(model, X_cal, y_cal, X_test, alpha=0.1):
    """
    Uses MAPIE in "prefit" mode to get classification intervals.
    If degenerate single-class intervals appear, fallback to min/max probabilities.
    Returns (y_pred, lower_bounds, upper_bounds).
    """
    mapie_clf = MapieClassifier(
        estimator=model,
        method="score",
        cv="prefit",  # do not retrain the underlying model
    )
    mapie_clf.fit(X_cal, y_cal)  # calibrate

    # "set" => returns (y_pred, y_pred_sets)
    y_pred, y_pis = mapie_clf.predict(X_test, alpha=[alpha], prediction_type="set")

    print("[MAPIE] y_pred shape:", y_pred.shape)
    print("[MAPIE] y_pis shape:", y_pis.shape)

    if y_pis.ndim == 3 and y_pis.shape[2] == 1:
        # Degenerate single-class intervals
        print("Degenerate intervals -> fallback to probabilities.")
        if hasattr(model, "predict_proba"):
            y_prob = model.predict_proba(X_test)
            lower_bounds = y_prob.min(axis=1)
            upper_bounds = y_prob.max(axis=1)
        else:
            lower_bounds = np.full(len(X_test), np.nan)
            upper_bounds = np.full(len(X_test), np.nan)
    else:
        # typical multi-class intervals => shape: (n_samples, 1, n_classes)
        classes_ = mapie_clf.classes_
        class_idx = np.array([np.where(classes_ == c)[0][0] for c in y_pred])
        lower_bounds = y_pis[np.arange(len(y_pis)), class_idx, 0]
        upper_bounds = y_pis[np.arange(len(y_pis)), class_idx, 1]

    return y_pred, lower_bounds, upper_bounds

###############################################################################
# 5) MAIN EVALUATION LOOP (CNN & RF)
###############################################################################
def evaluate_pretrained_models(csv_path, models_base_dir):
    """
    1) Load & group-split data
    2) For each label subfolder in 'models_base_dir', look for subfolders:
         - 'cnn' with *.h5
         - 'random_forest' with *.pkl
    3) Load the model, wrap it, apply MAPIE in prefit mode, save intervals.
    """
    df_cal, df_test, X_cal, X_test, label_cols = load_and_split_data(csv_path)

    # We'll handle CNN and RandomForest
    model_types = ["cnn", "random_forest"]
    results = []

    labels_found = [
        d for d in os.listdir(models_base_dir)
        if os.path.isdir(os.path.join(models_base_dir, d))
    ]

    for label_name in labels_found:
        if label_name not in label_cols:
            print(f"[SKIP] {label_name} not found in CSV label columns.")
            continue

        # define y_cal, y_test for this label
        y_cal = df_cal[label_name].astype(int)
        y_test = df_test[label_name].astype(int)

        # For each model type, check subfolder
        for mtype in model_types:
            subfolder_path = os.path.join(models_base_dir, label_name, mtype)
            if not os.path.isdir(subfolder_path):
                print(f"[SKIP] No '{mtype}' subfolder for label {label_name}")
                continue

            # find model files
            if mtype == "cnn":
                model_files = [f for f in os.listdir(subfolder_path)
                               if f.endswith(".h5")]
            else:  # random_forest
                model_files = [f for f in os.listdir(subfolder_path)
                               if f.endswith(".pkl")]

            if not model_files:
                print(f"[SKIP] No {mtype} model file found in {subfolder_path}")
                continue

            # take the first model file
            model_file = model_files[0]
            full_model_path = os.path.join(subfolder_path, model_file)
            print(f"\n=== Processing Label: {label_name}, Model: {mtype}, File: {model_file} ===")

            # Load & wrap the model
            if mtype == "cnn":
                # Keras
                wrapped_model = KerasClassifier(model_path=full_model_path, n_classes=11)
                wrapped_model.fit(X_cal, y_cal)
            else:
                # Random Forest
                rf_model = joblib.load(full_model_path)
                wrapped_model = SklearnClassifierWrapper(rf_model)
                wrapped_model.fit(X_cal, y_cal)  # sets self.classes_

            # Apply MAPIE
            y_pred, lower, upper = apply_mapie_fallback(wrapped_model, X_cal, y_cal, X_test, alpha=0.1)

            # Save intervals
            out_dir = os.path.join("results_mapie", label_name)
            os.makedirs(out_dir, exist_ok=True)
            intervals_file = os.path.join(out_dir, f"{mtype}_intervals_{label_name}.csv")

            intervals_df = pd.DataFrame({
                "Index": np.arange(len(X_test)),
                "y_test": y_test,
                "y_pred": y_pred,
                "Lower_Bound": lower,
                "Upper_Bound": upper,
            })
            intervals_df.to_csv(intervals_file, index=False)
            print(f"[INFO] Intervals saved to {intervals_file}")

            results.append({
                "label": label_name,
                "model_type": mtype,
                "model_file": model_file,
                "interval_csv": intervals_file,
            })

    # Summarize all
    df_results = pd.DataFrame(results)
    summary_path = "results_mapie/all_results.csv"
    os.makedirs(os.path.dirname(summary_path), exist_ok=True)
    df_results.to_csv(summary_path, index=False)
    print(f"\n=== All results saved to {summary_path} ===")


###############################################################################
# 6) RUN IT
###############################################################################
if __name__ == "__main__":
    # CSV path for your entire dataset
    CSV_PATH = "cleaned_dataset_0307.csv"

    # Base directory where your trained models are stored
    MODELS_BASE_DIR = "folds_original_models/fold_4/models"

    evaluate_pretrained_models(CSV_PATH, MODELS_BASE_DIR)


In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras.models import load_model
from sklearn.base import BaseEstimator, ClassifierMixin

###############################################################################
# KERAS WRAPPER FOR DIRECT PREDICTION
###############################################################################
class KerasClassifier(BaseEstimator, ClassifierMixin):
    """
    Wraps a pre-trained Keras model (.h5) so that it can be used
    for direct prediction (both class labels and probabilities).
    """
    def __init__(self, model_path, n_classes=11):
        self.model_path = model_path
        self.model_ = None
        self.classes_ = np.arange(n_classes)  # e.g. classes 0, 1, ..., 10
    
    def fit(self, X, y, **kwargs):
        # Load the pre-trained model (this is our "fit" step)
        self.model_ = load_model(self.model_path)
        return self
    
    def predict_proba(self, X, **kwargs):
        # Returns predicted probabilities for each class
        return self.model_.predict(X)
    
    def predict(self, X, **kwargs):
        # Returns the class with highest predicted probability for each sample
        proba = self.predict_proba(X)
        max_idx = np.argmax(proba, axis=1)
        return self.classes_[max_idx]

###############################################################################
# FUNCTION TO LOAD AND SPLIT DATA
###############################################################################
def load_and_split_data(csv_path, test_size=0.2, random_state=42):
    """
    Load the CSV data, split into calibration (train) and test sets using
    GroupShuffleSplit based on 'participant_id', and select feature columns.
    """
    df = pd.read_csv(csv_path)

    # Define feature columns: here we select columns starting with specific patterns.
    feature_cols = df.filter(regex='^(hrv|eda|acc|ibis|num_ibis)').columns.tolist()

    # Group split based on 'participant_id'
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    participants = df['participant_id'].values
    train_idx, test_idx = next(gss.split(df, groups=participants))

    X_cal = df.iloc[train_idx][feature_cols]
    X_test = df.iloc[test_idx][feature_cols]

    return df, X_cal, X_test, train_idx, test_idx

###############################################################################
# MAIN SCRIPT: LOAD MODEL, PREDICT, AND SHOW RESULTS
###############################################################################
if __name__ == "__main__":
    # Paths to CSV and models directory (adjust as needed)
    CSV_PATH = "cleaned_dataset_0307.csv"
    MODELS_BASE_DIR = "folds_original_models/fold_4/models"

    # Load data and get feature splits
    df, X_cal, X_test, train_idx, test_idx = load_and_split_data(CSV_PATH)
    
    # Find label columns (those containing "slider" in their name)
    label_cols = [c for c in df.columns if 'slider' in c.lower()]
    if len(label_cols) == 0:
        raise ValueError("No label columns found in the CSV.")
    
    # For this example, use the first found label column
    label_name = label_cols[0]
    print("Using label column:", label_name)
    
    # Get true labels for calibration and test sets
    y_cal = df.iloc[train_idx][label_name].astype(int)
    y_test = df.iloc[test_idx][label_name].astype(int)
    
    # Build the expected CNN model directory for the chosen label.
    cnn_dir = os.path.join(MODELS_BASE_DIR, label_name, "cnn")
    if not os.path.isdir(cnn_dir):
        raise FileNotFoundError(f"Directory {cnn_dir} does not exist.")
    
    # Find the first .h5 model file in the CNN directory.
    model_files = [f for f in os.listdir(cnn_dir) if f.endswith(".h5")]
    if not model_files:
        raise FileNotFoundError("No .h5 model found in " + cnn_dir)
    
    h5_file = model_files[0]
    model_path = os.path.join(cnn_dir, h5_file)
    print("Loading model from:", model_path)
    
    # Instantiate the KerasClassifier and load the pre-trained model
    model_wrapper = KerasClassifier(model_path=model_path, n_classes=11)
    model_wrapper.fit(X_cal, y_cal)
    
    # Direct prediction on the test set
    y_pred = model_wrapper.predict(X_test)
    y_proba = model_wrapper.predict_proba(X_test)
    
    # Display direct prediction results
    print("\n=== Direct Prediction Results ===")
    print("Predicted class indices for test set:")
    print(y_pred)
    
    print("\nPredicted probabilities for the first 5 test samples:")
    print(y_proba[:5])
    
    # Optionally, compare true vs predicted labels for the test set
    comparison = pd.DataFrame({
        "y_test": y_test,
        "y_pred": y_pred
    })
    print("\nComparison of true and predicted labels (first 10 samples):")
    print(comparison.head(10))


In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import GroupShuffleSplit
from tensorflow.keras.models import load_model
from sklearn.base import BaseEstimator, ClassifierMixin

###############################################################################
# KERAS WRAPPER FOR DIRECT PREDICTION
###############################################################################
class KerasClassifier(BaseEstimator, ClassifierMixin):
    """
    Wraps a pre-trained Keras model (.h5) so that it can be used
    for direct prediction (both class labels and probabilities).
    """
    def __init__(self, model_path, n_classes=11):
        self.model_path = model_path
        self.model_ = None
        self.classes_ = np.arange(n_classes)  # e.g. classes 0, 1, ..., 10
    
    def fit(self, X, y, **kwargs):
        # Load the pre-trained model (this is our "fit" step)
        self.model_ = load_model(self.model_path)
        return self
    
    def predict_proba(self, X, **kwargs):
        # Returns predicted probabilities for each class
        return self.model_.predict(X)
    
    def predict(self, X, **kwargs):
        # Returns the class with highest predicted probability for each sample
        proba = self.predict_proba(X)
        max_idx = np.argmax(proba, axis=1)
        return self.classes_[max_idx]

###############################################################################
# FUNCTION TO LOAD AND SPLIT DATA
###############################################################################
def load_and_split_data(csv_path, test_size=0.2, random_state=42):
    """
    Load the CSV data, split into calibration (train) and test sets using
    GroupShuffleSplit based on 'participant_id', and select feature columns.
    """
    df = pd.read_csv(csv_path)

    # Define feature columns: select columns starting with specific patterns.
    feature_cols = df.filter(regex='^(hrv|eda|acc|ibis|num_ibis)').columns.tolist()

    # Group split based on 'participant_id'
    gss = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    participants = df['participant_id'].values
    train_idx, test_idx = next(gss.split(df, groups=participants))

    X_cal = df.iloc[train_idx][feature_cols]
    X_test = df.iloc[test_idx][feature_cols]

    return df, X_cal, X_test, train_idx, test_idx

###############################################################################
# DEBUGGING FUNCTION (Adjusted for NumPy arrays)
###############################################################################
def debug_model(model_wrapper, X_cal, X_test, y_cal, y_test):
    # Print shapes and basic statistics
    print("=== Data Information ===")
    print("Calibration set shape:", X_cal.shape)
    print("Test set shape:", X_test.shape)
    print("\nCalibration set (first 5 rows):")
    print(X_cal[:5])
    print("\nTest set (first 5 rows):")
    print(X_test[:5])

    # Display the model summary
    print("\n=== Model Summary ===")
    model_wrapper.model_.summary()

    # Predict on calibration and test sets
    y_pred_cal = model_wrapper.predict(X_cal)
    y_pred_test = model_wrapper.predict(X_test)

    print("\n=== Predictions on Calibration Set ===")
    df_cal_pred = pd.DataFrame({"y_cal": y_cal, "y_pred_cal": y_pred_cal})
    print(df_cal_pred.head())

    print("\n=== Predictions on Test Set ===")
    df_test_pred = pd.DataFrame({"y_test": y_test, "y_pred_test": y_pred_test})
    print(df_test_pred.head())

    # Check if the predicted probabilities vary across samples
    y_proba_test = model_wrapper.predict_proba(X_test)
    print("\n=== Predicted Probabilities (first 5 test samples) ===")
    print(y_proba_test[:5])
    
    # Print basic stats of predicted probabilities to check for constant outputs
    print("\n=== Statistics of Predicted Probabilities ===")
    print("Min values across samples:", y_proba_test.min(axis=0))
    print("Max values across samples:", y_proba_test.max(axis=0))
    print("Mean values across samples:", y_proba_test.mean(axis=0))

###############################################################################
# MAIN SCRIPT: LOAD MODEL, PREDICT, AND DEBUG
###############################################################################
if __name__ == "__main__":
    # Paths to CSV and models directory (adjust as needed)
    CSV_PATH = "cleaned_dataset_0307.csv"
    MODELS_BASE_DIR = "folds_original_models/fold_4/models"

    # Load data and get feature splits
    df, X_cal_df, X_test_df, train_idx, test_idx = load_and_split_data(CSV_PATH)
    
    # Find label columns (those containing "slider" in their name)
    label_cols = [c for c in df.columns if 'slider' in c.lower()]
    if len(label_cols) == 0:
        raise ValueError("No label columns found in the CSV.")
    
    # For this example, use the first found label column
    label_name = label_cols[0]
    print("Using label column:", label_name)
    
    # Get true labels for calibration and test sets
    y_cal = df.iloc[train_idx][label_name].astype(int)
    y_test = df.iloc[test_idx][label_name].astype(int)
    
    # Build the expected CNN model directory for the chosen label.
    cnn_dir = os.path.join(MODELS_BASE_DIR, label_name, "cnn")
    if not os.path.isdir(cnn_dir):
        raise FileNotFoundError(f"Directory {cnn_dir} does not exist.")
    
    # Find the first .h5 model file in the CNN directory.
    model_files = [f for f in os.listdir(cnn_dir) if f.endswith(".h5")]
    if not model_files:
        raise FileNotFoundError("No .h5 model found in " + cnn_dir)
    
    h5_file = model_files[0]
    model_path = os.path.join(cnn_dir, h5_file)
    print("Loading model from:", model_path)
    
    # Instantiate the KerasClassifier and load the pre-trained model
    model_wrapper = KerasClassifier(model_path=model_path, n_classes=11)
    model_wrapper.fit(X_cal_df, y_cal)

    # --- IMPORTANT: Reshape input data to add a channel dimension ---
    # Convert DataFrames to NumPy arrays and reshape:
    print("\nBefore reshape, X_test shape:", X_test_df.shape)
    X_cal = X_cal_df.to_numpy().reshape(-1, X_cal_df.shape[1], 1)
    X_test = X_test_df.to_numpy().reshape(-1, X_test_df.shape[1], 1)
    print("After reshape, X_test shape:", X_test.shape)
    
    # Direct prediction on the reshaped test set
    y_pred = model_wrapper.predict(X_test)
    y_proba = model_wrapper.predict_proba(X_test)
    
    # Display direct prediction results
    print("\n=== Direct Prediction Results ===")
    print("Predicted class indices for test set:")
    print(y_pred)
    
    print("\nPredicted probabilities for the first 5 test samples:")
    print(y_proba[:5])
    
    # Compare true vs predicted labels for the test set
    print("\nComparison of true and predicted labels (first 10 samples):")
    comparison = pd.DataFrame({
        "y_test": y_test,
        "y_pred": y_pred
    })
    print(comparison.head(10))
    
    # Run additional debugging outputs on the reshaped data
    debug_model(model_wrapper, X_cal, X_test, y_cal, y_test)
