# Prediction and Evaluation

This notebook handles model evaluation and prediction including:
- Model loading
- Making predictions
- Performance metrics calculation
- Visualization of results
- Error analysis

In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import models
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    accuracy_score, 
    precision_recall_fscore_support
)
import cv2

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## 1. Load Trained Model

In [None]:
def load_model_and_labels(model_dir='../models'):
    """
    Load trained model and class labels
    
    Args:
        model_dir (str): Directory containing model artifacts
    
    Returns:
        tuple: Loaded model, class labels
    """
    model_dir = Path(model_dir)
    
    # Load model
    try:
        model = models.load_model(model_dir / 'satellite_classifier.keras')
    except Exception as e:
        print(f"Error loading model: {e}")
        model = None
    
    # Load class labels
    try:
        class_labels = pd.read_csv(model_dir / 'class_labels.csv', header=None)[0].tolist()
    except Exception as e:
        print(f"Error loading class labels: {e}")
        class_labels = None
    
    return model, class_labels

# Load model and labels
model, class_labels = load_model_and_labels()

print("Model loaded successfully")
print("Classes:", class_labels)

## 2. Load Test Data

In [None]:
def load_test_data(data_dir='../processed_data'):
    """
    Load preprocessed test data
    
    Args:
        data_dir (str): Directory containing preprocessed data
    
    Returns:
        tuple: Test images, test labels, one-hot encoded labels
    """
    data_dir = Path(data_dir)
    
    # Load test data
    X_test = np.load(data_dir / 'X_test.npy')
    y_test_orig = np.load(data_dir / 'y_test.npy')
    
    # One-hot encode labels
    y_test_one_hot = tf.keras.utils.to_categorical(y_test_orig)
    
    return X_test, y_test_orig, y_test_one_hot

# Load test data
X_test, y_test_orig, y_test_one_hot = load_test_data()

print("Test data shape:", X_test.shape)
print("Unique test labels:", np.unique(y_test_orig))

## 3. Make Predictions

In [None]:
def make_predictions(model, X_test):
    """
    Generate predictions on test data
    
    Args:
        model (tf.keras.Model): Trained classification model
        X_test (np.ndarray): Test image data
    
    Returns:
        tuple: Predicted probabilities, predicted classes
    """
    # Generate prediction probabilities
    y_pred_proba = model.predict(X_test)
    
    # Convert probabilities to class predictions
    y_pred_classes = np.argmax(y_pred_proba, axis=1)
    
    return y_pred_proba, y_pred_classes

# Generate predictions
y_pred_proba, y_pred_classes = make_predictions(model, X_test)

print("Prediction shape:", y_pred_classes.shape)
print("Unique predicted classes:", np.unique(y_pred_classes))

## 4. Calculate Performance Metrics

In [None]:
def calculate_performance_metrics(y_true, y_pred, class_labels):
    """
    Calculate comprehensive performance metrics
    
    Args:
        y_true (np.ndarray): Ground truth labels
        y_pred (np.ndarray): Predicted labels
        class_labels (list): List of class labels
    """
    # Overall accuracy
    accuracy = accuracy_score(y_true, y_pred)
    print(f"Overall Accuracy: {accuracy * 100:.2f}%")
    
    # Precision, Recall, F1-Score
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average='weighted'
    )
    
    print("\nWeighted Metrics:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    
    # Detailed classification report
    print("\nDetailed Classification Report:")
    print(classification_report(
        y_true, y_pred, 
        target_names=class_labels
    ))

# Calculate metrics
calculate_performance_metrics(y_test_orig, y_pred_classes, class_labels)

## 5. Visualize Results

In [None]:
def visualize_confusion_matrix(y_true, y_pred, class_labels):
    """
    Create and plot confusion matrix
    
    Args:
        y_true (np.ndarray): Ground truth labels
        y_pred (np.ndarray): Predicted labels
        class_labels (list): List of class labels
    """
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm, 
        annot=True, 
        fmt='d', 
        cmap='Blues', 
        xticklabels=class_labels, 
        yticklabels=class_labels
    )
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.tight_layout()
    plt.show()

def visualize_sample_predictions(X_test, y_test_orig, y_pred_classes, class_labels, num_samples=9):
    """
    Visualize sample predictions
    
    Args:
        X_test (np.ndarray): Test images
        y_test_orig (np.ndarray): True labels
        y_pred_classes (np.ndarray): Predicted labels
        class_labels (list): List of class labels
        num_samples (int): Number of samples to visualize
    """
    plt.figure(figsize=(15, 10))
    
    for i in range(num_samples):
        plt.subplot(3, 3, i+1)
        plt.imshow(X_test[i])
        
        true_label = class_labels[y_test_orig[i]]
        pred_label = class_labels[y_pred_classes[i]]
        
        color = 'green' if true_label == pred_label else 'red'
        title = f"True: {true_label}\nPred: {pred_label}"
        
        plt.title(title, color=color)
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# Generate visualizations
visualize_confusion_matrix(y_test_orig, y_pred_classes, class_labels)
visualize_sample_predictions(X_test, y_test_orig, y_pred_classes, class_labels)

## 6. Error Analysis

In [None]:
def perform_error_analysis(y_true, y_pred, y_pred_proba, class_labels):
    """
    Conduct detailed error analysis
    
    Args:
        y_true (np.ndarray): Ground truth labels
        y_pred (np.ndarray): Predicted labels
        y_pred_proba (np.ndarray): Prediction probabilities
        class_labels (list): List of class labels
    """
    # Identify misclassified samples
    misclassified_mask = y_true != y_pred
    misclassified_indices = np.where(misclassified_mask)[0]
    
    # Print error summary
    print("Error Analysis:")
    print(f"Total Misclassified Samples: {len(misclassified_indices)} / {len(y_true)}")
    
    # Detailed misclassification breakdown
    misclassification_matrix = pd.crosstab(
        pd.Series(y_true[misclassified_mask], name='True Label'),
        pd.Series(y_pred[misclassified_mask], name='Predicted Label')
    )
    
    print("\nMisclassification Matrix:")
    print(misclassification_matrix)
    
    # Confidence analysis of misclassifications
    misclassified_confidences = y_pred_proba[misclassified_mask].max(axis=1)
    print("\nMisclassification Confidence Statistics:")
    print(f"Mean Confidence: {misclassified_confidences.mean():.4f}")
    print(f"Median Confidence: {np.median(misclassified_confidences):.4f}")
    print(f"Min Confidence: {misclassified_confidences.min():.4f}")

# Perform error analysis
perform_error_analysis(y_test_orig, y_pred_classes, y_pred_proba, class_labels)