In [8]:
import os
import pandas as pd
import numpy as np
import joblib
import torch
import torch.nn as nn
from sklearn.metrics import confusion_matrix

# Paths
data_dir = "5fold_splits_pca"
model_dir = "trained_models_pca"
output_dir = "confusion_matrix_data"
os.makedirs(output_dir, exist_ok=True)

# Load PCA and Scaler models
pca_path = os.path.join(data_dir, "pca_model.pkl")
scaler_path = os.path.join(data_dir, "scaler.pkl")

if not os.path.exists(pca_path) or not os.path.exists(scaler_path):
    raise FileNotFoundError("PCA model or Scaler not found. Ensure they are generated and saved properly.")

pca = joblib.load(pca_path)
scaler = joblib.load(scaler_path)

# Define target aspects
target_aspects = ["Positive_Emotions", "Negative_Emotions", "Self_Esteem", "Meaning_in_Life", "Social_Support"]

# Define bin edges (assuming scores range 0-10, adjust if needed)
num_bins = 10
bin_edges = np.linspace(0, 10, num_bins + 1)

# Define CNN model (must match saved structure)
class CNNRegressor(nn.Module):
    def __init__(self, input_dim):
        super(CNNRegressor, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * input_dim, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Iterate through models and folds
for target in target_aspects:
    for model_name in ["RandomForest", "XGBoost", "CNN"]:
        true_values = []
        pred_values = []

        for fold in range(5):
            # Load test data
            test_file = os.path.join(data_dir, f"test_fold_{fold}_pca.csv")
            test_df = pd.read_csv(test_file)

            # Extract feature columns
            feature_cols = [col for col in test_df.columns if col.startswith("PC")]
            X_test = test_df[feature_cols]
            y_test = test_df[target]

            # # Apply PCA transformation to X_test
            # X_test_scaled = scaler.transform(X_test)
            # X_test_pca = pca.transform(X_test_scaled)
            X_test_df = pd.DataFrame(X_test, columns=feature_cols)

            # Load model and make predictions
            if model_name == "CNN":
                input_dim = len(feature_cols)
                model = CNNRegressor(input_dim)
                model.load_state_dict(torch.load(os.path.join(model_dir, f"CNN_{target}.pt")))
                model.eval()
                X_test_tensor = torch.tensor(X_test_df.values, dtype=torch.float32).unsqueeze(1)
                with torch.no_grad():
                    y_pred = model(X_test_tensor).numpy().flatten()
            else:
                model = joblib.load(os.path.join(model_dir, f"{model_name}_{target}.joblib"))
                y_pred = model.predict(X_test_df)

            # Collect true and predicted values
            true_values.extend(y_test)
            pred_values.extend(y_pred)

        # Convert to bins
        true_bins = np.digitize(true_values, bins=bin_edges) - 1
        pred_bins = np.digitize(pred_values, bins=bin_edges) - 1

        # Generate confusion matrix
        cm = confusion_matrix(true_bins, pred_bins, labels=np.arange(num_bins))

        # Save as DataFrame
        cm_df = pd.DataFrame(cm, index=[f"True_{i}" for i in range(num_bins)], columns=[f"Pred_{i}" for i in range(num_bins)])
        cm_file = os.path.join(output_dir, f"confusion_matrix_{model_name}_{target}.csv")
        cm_df.to_csv(cm_file)

        print(f"Saved confusion matrix for {model_name} - {target} at {cm_file}")


Saved confusion matrix for RandomForest - Positive_Emotions at confusion_matrix_data/confusion_matrix_RandomForest_Positive_Emotions.csv
Saved confusion matrix for XGBoost - Positive_Emotions at confusion_matrix_data/confusion_matrix_XGBoost_Positive_Emotions.csv



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



Saved confusion matrix for CNN - Positive_Emotions at confusion_matrix_data/confusion_matrix_CNN_Positive_Emotions.csv
Saved confusion matrix for RandomForest - Negative_Emotions at confusion_matrix_data/confusion_matrix_RandomForest_Negative_Emotions.csv



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



Saved confusion matrix for XGBoost - Negative_Emotions at confusion_matrix_data/confusion_matrix_XGBoost_Negative_Emotions.csv
Saved confusion matrix for CNN - Negative_Emotions at confusion_matrix_data/confusion_matrix_CNN_Negative_Emotions.csv
Saved confusion matrix for RandomForest - Self_Esteem at confusion_matrix_data/confusion_matrix_RandomForest_Self_Esteem.csv



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



Saved confusion matrix for XGBoost - Self_Esteem at confusion_matrix_data/confusion_matrix_XGBoost_Self_Esteem.csv
Saved confusion matrix for CNN - Self_Esteem at confusion_matrix_data/confusion_matrix_CNN_Self_Esteem.csv
Saved confusion matrix for RandomForest - Meaning_in_Life at confusion_matrix_data/confusion_matrix_RandomForest_Meaning_in_Life.csv



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



Saved confusion matrix for XGBoost - Meaning_in_Life at confusion_matrix_data/confusion_matrix_XGBoost_Meaning_in_Life.csv
Saved confusion matrix for CNN - Meaning_in_Life at confusion_matrix_data/confusion_matrix_CNN_Meaning_in_Life.csv
Saved confusion matrix for RandomForest - Social_Support at confusion_matrix_data/confusion_matrix_RandomForest_Social_Support.csv



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"



Saved confusion matrix for XGBoost - Social_Support at confusion_matrix_data/confusion_matrix_XGBoost_Social_Support.csv
Saved confusion matrix for CNN - Social_Support at confusion_matrix_data/confusion_matrix_CNN_Social_Support.csv
