In [None]:
# ─────────────────────────────────────────────────────────────
# 1. Import Required Libraries
# ─────────────────────────────────────────────────────────────
import os
import joblib
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# --- Filter warnings for a cleaner output ---
warnings.filterwarnings('ignore')

In [None]:
# ─────────────────────────────────────────────────────────────
# 2. Configuration Block
# ─────────────────────────────────────────────────────────────
# --- Define file paths ---
# Note: Update this base path to your project's root directory
BASE_PATH = r"d:\GitHub Repos\spray-vision"
DATA_PATH = os.path.join(BASE_PATH, "data", "processed", "preprocessed_dataset.csv")
MODELS_DIR = os.path.join(BASE_PATH, "models")
OUTPUT_DIR = os.path.join(BASE_PATH, "outputs")

# --- Define model and feature names ---
INPUTS = ["time", "chamb_pressure", "cham_temp", "injection_pres", "density", "viscosity"]
TARGETS = ["angle_mie", "length_mie", "angle_shadow", "length_shadow"]
RANDOM_STATE = 42

# --- Plotting Style ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (20, 15)
plt.rcParams['font.size'] = 12

In [None]:
# ─────────────────────────────────────────────────────────────
# 3. Data Loading and Preparation Functions
# ─────────────────────────────────────────────────────────────
def load_and_prepare_data(data_path, inputs, targets, test_size=0.2, random_state=42):
    """Loads, renames, and splits the dataset."""
    try:
        raw_df = pd.read_csv(data_path)
    except FileNotFoundError:
        print(f"Error: Data file not found at {data_path}")
        return None, None, None, None, None

    rename_map = {
        "Time_ms": "time", "Pc_bar": "chamb_pressure", "Tc_K": "cham_temp",
        "Pinj_bar": "injection_pres", "rho_kgm3": "density", "mu_Pas": "viscosity",
        "angle_shadow_deg": "angle_shadow", "len_shadow_L_D": "length_shadow",
        "angle_mie_deg": "angle_mie", "len_mie_L_D": "length_mie",
    }
    df = raw_df.rename(columns=rename_map)
    
    X = df[inputs]
    y = df[targets]
    runs = df["run"] # for stratified split

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=runs
    )
    
    print("✓ Data loaded and split successfully.")
    print(f"  Training set shape: X={X_train.shape}, y={y_train.shape}")
    print(f"  Test set shape:     X={X_test.shape}, y={y_test.shape}")
    
    return df, X_train, X_test, y_train, y_test

In [None]:
# ─────────────────────────────────────────────────────────────
# 4. Model Loading Functions - Scikit-learn Models
# ─────────────────────────────────────────────────────────────
def load_sklearn_models(models_dir):
    """Loads all scikit-learn models from .joblib files."""
    baseline_models = {
        'Linear Regression': 'LinearRegression_regressor.joblib',
        'Decision Tree': 'DecisionTree_regressor.joblib',
        'Random Forest': 'RandomForest_regressor.joblib',
        'Gradient Boosting': 'GradientBoosting_regressor.joblib',
        'SVR': 'SVR_regressor.joblib',
        'KNN': 'KNN_regressor.joblib'
    }
    loaded_models = {}
    print("\nLoading scikit-learn models...")
    for model_name, model_file in baseline_models.items():
        try:
            model_path = os.path.join(models_dir, model_file)
            loaded_models[model_name] = joblib.load(model_path)
            print(f"  ✓ {model_name}")
        except FileNotFoundError:
            print(f"  ✗ {model_name}: Model file not found.")
    return loaded_models

In [None]:
# ─────────────────────────────────────────────────────────────
# 4. Model Loading Functions - ANN Model
# ─────────────────────────────────────────────────────────────
def load_ann_model(models_dir):
    """Loads the Keras ANN model, handling version compatibility issues."""
    print("\nLoading ANN model...")
    models_to_try = [
        ('ANN_improved_regressor.h5', 'ANN (Improved)'),
        ('ANN_regressor.h5', 'ANN')
    ]
    for model_file, model_name in models_to_try:
        try:
            model_path = os.path.join(models_dir, model_file)
            # Load model without compiling to avoid deserialization errors
            model = keras.models.load_model(model_path, compile=False)
            # Re-compile with a standard optimizer and loss
            model.compile(optimizer='adam', loss='mse')
            print(f"  ✓ {model_name} loaded successfully.")
            
            # Load corresponding scalers
            input_scaler = joblib.load(os.path.join(models_dir, 'ann_input_scaler.joblib'))
            target_scaler = joblib.load(os.path.join(models_dir, 'ann_target_scaler.joblib'))
            print("  ✓ ANN scalers loaded.")
            
            return {model_name: model}, input_scaler, target_scaler
        except (IOError, FileNotFoundError):
            print(f"  ! {model_name} or its scalers not found. Trying next...")
        except Exception as e:
            print(f"  ✗ An error occurred loading {model_name}: {e}")
            
    print("  ✗ All ANN models failed to load.")
    return {}, None, None

In [None]:
# ─────────────────────────────────────────────────────────────
# 5. Prediction and Evaluation Functions - Generate Predictions
# ─────────────────────────────────────────────────────────────
def generate_predictions(X_test, sklearn_models, ann_model_dict, ann_input_scaler, ann_target_scaler):
    """Generates predictions for all loaded models."""
    predictions = {}
    
    # Scikit-learn model predictions
    for name, model in sklearn_models.items():
        predictions[name] = model.predict(X_test)
        
    # ANN model predictions
    if ann_model_dict:
        ann_name, ann_model = list(ann_model_dict.items())[0]
        X_test_scaled = ann_input_scaler.transform(X_test)
        y_pred_scaled = ann_model.predict(X_test_scaled, verbose=0)
        predictions[ann_name] = ann_target_scaler.inverse_transform(y_pred_scaled)
        
    print(f"\n✓ Predictions generated for {len(predictions)} models.")
    return predictions

In [None]:
# ─────────────────────────────────────────────────────────────
# 5. Prediction and Evaluation Functions - Evaluate Performance
# ─────────────────────────────────────────────────────────────
def evaluate_performance(y_true, predictions, targets):
    """Calculates R², MAE, and RMSE and returns a summary DataFrame."""
    results = []
    for model_name, y_pred in predictions.items():
        for i, target in enumerate(targets):
            r2 = r2_score(y_true[target], y_pred[:, i])
            mae = mean_absolute_error(y_true[target], y_pred[:, i])
            rmse = np.sqrt(mean_squared_error(y_true[target], y_pred[:, i]))
            results.append({
                'Model': model_name,
                'Target': target,
                'R²': r2,
                'MAE': mae,
                'RMSE': rmse
            })
    
    # Calculate overall average metrics for each model
    metrics_df = pd.DataFrame(results)
    overall_metrics = metrics_df.groupby('Model')[['R²', 'MAE', 'RMSE']].mean().reset_index()
    overall_metrics['Target'] = 'Overall (Avg)'
    
    # Combine and sort
    summary_df = pd.concat([metrics_df, overall_metrics]).sort_values(
        by=['Target', 'R²'], ascending=[True, False]
    ).reset_index(drop=True)
    
    return summary_df

In [None]:
# ─────────────────────────────────────────────────────────────
# 6. Visualization Function
# ─────────────────────────────────────────────────────────────
def plot_true_vs_predicted(y_true, predictions, targets, metrics_df, output_dir):
    """Creates and saves a grid of True vs. Predicted plots for each target."""
    n_targets = len(targets)
    n_models = len(predictions)
    colors = sns.color_palette("husl", n_models)
    model_colors = {model: color for model, color in zip(predictions.keys(), colors)}

    os.makedirs(output_dir, exist_ok=True)
    
    for i, target in enumerate(targets):
        fig, axes = plt.subplots(2, (n_models + 1) // 2, figsize=(20, 10), constrained_layout=True)
        axes = axes.flatten()
        fig.suptitle(f'True vs. Predicted Values for: {target.replace("_", " ").title()}', fontsize=20, fontweight='bold')
        
        y_true_target = y_true[target]
        min_val = y_true_target.min()
        max_val = y_true_target.max()
        
        for j, (model_name, y_pred) in enumerate(predictions.items()):
            ax = axes[j]
            y_pred_target = y_pred[:, i]
            
            # Update plot range
            min_val = min(min_val, y_pred_target.min())
            max_val = max(max_val, y_pred_target.max())
            
            # Plot data
            ax.scatter(y_true_target, y_pred_target, alpha=0.6, s=50, color=model_colors[model_name], edgecolors='k', lw=0.5)
            
            # Metrics text
            metrics = metrics_df[(metrics_df['Model'] == model_name) & (metrics_df['Target'] == target)].iloc[0]
            r2, mae = metrics['R²'], metrics['MAE']
            ax.set_title(f'{model_name}\nR²={r2:.3f} | MAE={mae:.3f}', fontsize=12, fontweight='bold')
            ax.set_xlabel('True Values', fontsize=10)
            ax.set_ylabel('Predicted Values', fontsize=10)
            ax.grid(True, which='both', linestyle='--', linewidth=0.5)

        # Finalize and save each target's plot
        range_pad = (max_val - min_val) * 0.1
        for ax in axes[:n_models]: # Only format used axes
            ax.plot([min_val - range_pad, max_val + range_pad], [min_val - range_pad, max_val + range_pad], 'r--', lw=2)
            ax.set_xlim(min_val - range_pad, max_val + range_pad)
            ax.set_ylim(min_val - range_pad, max_val + range_pad)
            ax.set_aspect('equal')
        
        # Hide unused subplots
        for k in range(n_models, len(axes)):
            axes[k].set_visible(False)
            
        plot_filename = os.path.join(output_dir, f'true_vs_predicted_{target}.png')
        plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
        print(f"  ✓ Plot saved to: {plot_filename}")
        plt.show()

In [None]:
# ─────────────────────────────────────────────────────────────
# 7. Main Execution Block - Load Data
# ─────────────────────────────────────────────────────────────
# --- Load Data ---
full_df, X_train, X_test, y_train, y_test = load_and_prepare_data(DATA_PATH, INPUTS, TARGETS, random_state=RANDOM_STATE)

In [None]:
# ─────────────────────────────────────────────────────────────
# 7. Main Execution Block - Load Models
# ─────────────────────────────────────────────────────────────
# --- Load Models ---
sklearn_models = load_sklearn_models(MODELS_DIR)
ann_model_dict, ann_input_scaler, ann_target_scaler = load_ann_model(MODELS_DIR)

In [None]:
# ─────────────────────────────────────────────────────────────
# 7. Main Execution Block - Generate Predictions
# ─────────────────────────────────────────────────────────────
# --- Generate Predictions ---
all_predictions = generate_predictions(
    X_test, sklearn_models, ann_model_dict, ann_input_scaler, ann_target_scaler
)

In [None]:
# ─────────────────────────────────────────────────────────────
# 7. Main Execution Block - Evaluate Performance and Display Results
# ─────────────────────────────────────────────────────────────
# --- Evaluate Performance ---
if all_predictions:
    metrics_summary_df = evaluate_performance(y_test, all_predictions, TARGETS)
    
    print("\n" + "="*80)
    print("MODEL PERFORMANCE SUMMARY (Sorted by Overall R²)")
    print("="*80)
    
    # Display overall results sorted by R²
    overall_df = metrics_summary_df[metrics_summary_df['Target'] == 'Overall (Avg)'].sort_values('R²', ascending=False)
    print(overall_df.to_string(index=False))
    
    # Save metrics to CSV
    metrics_filename = os.path.join(OUTPUT_DIR, "detailed_model_metrics.csv")
    metrics_summary_df.to_csv(metrics_filename, index=False)
    print(f"\n✓ Detailed metrics saved to: {metrics_filename}")
else:
    print("\n⚠️ No predictions were generated. Cannot evaluate performance.")

In [None]:
# ─────────────────────────────────────────────────────────────
# 7. Main Execution Block - Create and Save Visualizations
# ─────────────────────────────────────────────────────────────
# --- Create and Save Visualizations ---
if all_predictions:
    print("\nGenerating and saving plots...")
    plot_true_vs_predicted(y_test, all_predictions, TARGETS, metrics_summary_df, OUTPUT_DIR)
    
    print("\n✅ Analysis complete!")
else:
    print("\n⚠️ No models were loaded. Cannot generate predictions or plots.")