In [None]:
import torch
import os
from pathlib import Path
import os
import numpy as np
from tqdm import tqdm
from PIL import Image
import glob
from ultralytics import YOLO
import yaml
import matplotlib.pyplot as plt

In [None]:
model_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/models/yolo11m_car1/weights/best.pt"
train_data_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/train/images"
train_labels_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/train/labels"
test_data_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/test/images"
test_labels_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/test/labels"
data_yaml_path = "/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/data.yaml"

## load YOLO models

### control model

In [None]:
# Load the YOLO model using the model_path variable that's already defined
model = YOLO(model_path)
print(f"Model loaded from: {model_path}")
# print(f"Model version: {model.model.version}")
print(f"Model task: {model.task}")

## get accuracy

### Load class mapping and helper functions

In [None]:
# Load class names from the YAML file
# data_yaml_path = '/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/data.yaml'
with open(data_yaml_path, 'r') as f:
    data_config = yaml.safe_load(f)

class_names = data_config['names']
print(f"Class names: {class_names}")

# Function to read ground truth labels
def read_label_file(label_path):
    """Read YOLO format label file and return list of objects with class and bounding box"""
    gt_objects = []
    try:
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 5:  # class, x_center, y_center, width, height
                    class_id = int(parts[0])
                    gt_objects.append({
                        'class_id': class_id,
                        'class_name': class_names[class_id] if class_id < len(class_names) else f"Unknown_{class_id}"
                    })
    except Exception as e:
        print(f"Error reading label file {label_path}: {e}")
    return gt_objects

# Function to compute class-based accuracy
def compute_class_accuracy(predicted_classes, gt_classes):
    """Compute accuracy metrics per class"""
    # Initialize counters for each class
    class_metrics = {name: {'TP': 0, 'FP': 0, 'FN': 0} for name in class_names}
    
    # Convert lists to sets for easier comparison
    pred_set = set(predicted_classes)
    gt_set = set(gt_classes)
    
    # Count true positives, false positives, and false negatives
    for cls in class_names:
        if cls in pred_set and cls in gt_set:
            class_metrics[cls]['TP'] += 1  # True positive
        elif cls in pred_set and cls not in gt_set:
            class_metrics[cls]['FP'] += 1  # False positive
        elif cls not in pred_set and cls in gt_set:
            class_metrics[cls]['FN'] += 1  # False negative
    
    return class_metrics

### Evaluate on original test images

In [None]:
def evaluate_precision_at_threshold(model, test_images_path, ground_truth_path, class_names, confidence_threshold=0.25):
    """
    Evaluate model precision at a specified confidence threshold.
    
    Args:
        model: YOLO model to evaluate
        test_images_path: Path to the directory containing test images
        ground_truth_path: Path to the directory containing ground truth labels
        class_names: List of class names
        confidence_threshold: Confidence threshold for predictions (default: 0.25)
        
    Returns:
        tuple: (class_precision, macro_precision) where:
            - class_precision is a dictionary mapping class names to precision values
            - macro_precision is the average precision across all classes
    """
    # Create dictionaries to store metrics
    overall_metrics = {name: {'TP': 0, 'FP': 0, 'FN': 0} for name in class_names}
    
    # Process each image in the test directory
    for img_path in glob.glob(os.path.join(test_images_path, '*.jpg')) + glob.glob(os.path.join(test_images_path, '*.png')):
        try:
            # Load image
            img = Image.open(img_path)
            img_filename = os.path.basename(img_path)
            base_name = os.path.splitext(img_filename)[0]
            
            # Get corresponding label file path
            label_path = os.path.join(ground_truth_path, base_name + '.txt')
            
            # Load ground truth data
            gt_objects = read_label_file(label_path)
            gt_classes = [obj['class_name'] for obj in gt_objects]
            
            # Run inference
            prediction = model(img)
            
            # Process predictions
            predicted_classes = []
            
            if len(prediction) > 0 and hasattr(prediction[0], 'boxes'):
                boxes = prediction[0].boxes
                
                # Extract class information from predictions
                for box in boxes:
                    # Get the confidence score
                    conf = float(box.conf)
                    # Get the class id and name
                    cls_id = int(box.cls)
                    cls_name = prediction[0].names[cls_id]
                    
                    # Only include predictions above the specified confidence threshold
                    if conf > confidence_threshold:
                        predicted_classes.append(cls_name)
            
            # Compute class-based accuracy for this image
            image_metrics = compute_class_accuracy(predicted_classes, gt_classes)
            
            # Update overall metrics
            for cls in class_names:
                overall_metrics[cls]['TP'] += image_metrics[cls]['TP']
                overall_metrics[cls]['FP'] += image_metrics[cls]['FP']
                overall_metrics[cls]['FN'] += image_metrics[cls]['FN']
            
        except Exception:
            # Silently continue if there's an error with an image
            pass
    
    # Calculate precision for each class
    class_precision = {}
    for cls in class_names:
        tp = overall_metrics[cls]['TP']
        fp = overall_metrics[cls]['FP']
        
        # Calculate precision
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        class_precision[cls] = precision
    
    # Calculate macro-averaged precision
    macro_precision = sum(class_precision.values()) / len(class_precision)
    
    return class_precision, macro_precision

In [None]:

precision = evaluate_precision_at_threshold(model, test_data_path, test_labels_path, class_names, confidence_threshold=0.25)
print("\nPrecision at 0.25 confidence threshold:")
print("{:<15} {:<10}".format("Class", "Precision"))
for cls, prec in precision[0].items():
    print("{:<15} {:<10.2f}".format(cls, prec))
print("\nMacro-averaged precision:")
print(f"{precision[1]:.4f}")

## Evaluate precision at different confidence thresholds

In [None]:
# Define confidence thresholds to evaluate
confidence_thresholds = [0.1, 0.25, 0.5, 0.75, 0.9]

# Dictionary to store results
threshold_results = {}

# Evaluate precision for each confidence threshold
for threshold in tqdm(confidence_thresholds, desc="Evaluating thresholds"):
    class_precision, macro_precision = evaluate_precision_at_threshold(
        model, test_data_path, test_labels_path, class_names, confidence_threshold=threshold
    )
    
    # Store results
    threshold_results[threshold] = {
        'class_precision': class_precision,
        'macro_precision': macro_precision
    }
    
    # Print current results
    print(f"\nPrecision at {threshold:.2f} confidence threshold:")
    print(f"Macro-averaged precision: {macro_precision:.4f}")

## Visualize relationship between confidence threshold and precision

In [None]:
# Extract threshold and precision values for plotting
thresholds = list(threshold_results.keys())
macro_precisions = [result['macro_precision'] for result in threshold_results.values()]

# Set up the figure for plotting
plt.figure(figsize=(12, 10))

# Plot 1: Overall macro-precision vs confidence threshold
plt.subplot(2, 1, 1)
plt.plot(thresholds, macro_precisions, 'o-', linewidth=2, markersize=8)
plt.xlabel('Confidence Threshold')
plt.ylabel('Macro-averaged Precision')
plt.title('Relationship Between Confidence Threshold and Precision')
plt.grid(True, linestyle='--', alpha=0.7)
plt.xticks(thresholds)
plt.ylim([0, 1.05])

# Annotate points with precision values
for i, precision in enumerate(macro_precisions):
    plt.annotate(f"{precision:.4f}", 
                 (thresholds[i], precision),
                 textcoords="offset points", 
                 xytext=(0,10), 
                 ha='center')

# Plot 2: Class-wise precision vs confidence threshold
plt.subplot(2, 1, 2)

# Extract class-wise precision for each threshold
class_precisions = {}
for cls in class_names:
    class_precisions[cls] = [result['class_precision'][cls] for result in threshold_results.values()]
    plt.plot(thresholds, class_precisions[cls], 'o-', linewidth=2, label=cls)

plt.xlabel('Confidence Threshold')
plt.ylabel('Precision')
plt.title('Class-wise Precision vs Confidence Threshold')
plt.grid(True, linestyle='--', alpha=0.7)
plt.xticks(thresholds)
plt.ylim([0, 1.05])
plt.legend(loc='lower right')

plt.tight_layout()
plt.show()

# Create a table showing the precision values for each class and threshold
print("\nPrecision Values by Class and Confidence Threshold:")
print("{:<12} {:<10} {:<10} {:<10} {:<10} {:<10}".format(
    "Class", *[f"Conf {t:.1f}" for t in thresholds]))
print("-" * 62)

for cls in class_names:
    print("{:<12} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
        cls, *class_precisions[cls]))

print("-" * 62)
print("{:<12} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f} {:<10.4f}".format(
    "Macro-avg", *macro_precisions))

## Testing augmented images

In [None]:
# Define the path for augmented test images
augmented_test_path = '/Users/jaydenma/Documents/mathematical image analysis/mia final project/Cars Detection/test/images_augmented'

# Create lists to store results
augmented_results = []
augmented_total_images = 0
augmented_overall_metrics = {name: {'TP': 0, 'FP': 0, 'FN': 0} for name in class_names}

# Get mapping from augmented filename to original filename for ground truth lookup
def get_original_filename(aug_filename):
    # Augmented files typically have format: original_name_filter.ext
    parts = os.path.splitext(aug_filename)[0].split('_')
    # Remove the filter suffix to get original name
    original_name = '_'.join(parts[:-1]) if len(parts) > 1 else parts[0]
    return original_name

# Process each image in the augmented test directory
for img_path in tqdm(glob.glob(os.path.join(augmented_test_path, '*.jpg')) + 
                     glob.glob(os.path.join(augmented_test_path, '*.png'))):
    try:
        # Load image
        img = Image.open(img_path)
        img_filename = os.path.basename(img_path)
        
        # Get original filename to find corresponding label
        original_base = get_original_filename(img_filename)
        label_path = os.path.join(ground_truth_path, original_base + '.txt')
        
        # Load ground truth data
        gt_objects = read_label_file(label_path)
        gt_classes = [obj['class_name'] for obj in gt_objects]
        
        # Run inference
        prediction = model(img)
        
        # Process predictions
        predicted_classes = []
        detections = []
        
        if len(prediction) > 0 and hasattr(prediction[0], 'boxes'):
            boxes = prediction[0].boxes
            
            # Extract class information from predictions
            for i, box in enumerate(boxes):
                # Get the confidence score
                conf = float(box.conf)
                # Get the class id and name
                cls_id = int(box.cls)
                cls_name = prediction[0].names[cls_id]
                # Get coordinates
                coords = box.xyxy.tolist()[0] if hasattr(box.xyxy, 'tolist') else box.xyxy[0].tolist()
                
                # Only include predictions with confidence > 0.25
                if conf > 0.25:
                    predicted_classes.append(cls_name)
                    detections.append({
                        'confidence': conf,
                        'class_id': cls_id,
                        'class_name': cls_name,
                        'box': coords
                    })
        
        # Compute class-based accuracy for this image
        image_metrics = compute_class_accuracy(predicted_classes, gt_classes)
        
        # Update overall metrics
        for cls in class_names:
            augmented_overall_metrics[cls]['TP'] += image_metrics[cls]['TP']
            augmented_overall_metrics[cls]['FP'] += image_metrics[cls]['FP']
            augmented_overall_metrics[cls]['FN'] += image_metrics[cls]['FN']
        
        # Store the result for this image
        augmented_results.append({
            'image_path': img_path,
            'ground_truth': gt_classes,
            'predictions': predicted_classes,
            'detections': detections,
            'metrics': image_metrics
        })
        
        augmented_total_images += 1
        
    except Exception as e:
        print(f"Error processing {img_path}: {e}")

# Calculate overall metrics for augmented images
augmented_class_precision = {}
augmented_class_recall = {}
augmented_class_f1 = {}

for cls in class_names:
    tp = augmented_overall_metrics[cls]['TP']
    fp = augmented_overall_metrics[cls]['FP']
    fn = augmented_overall_metrics[cls]['FN']
    
    # Calculate precision, recall, and F1 score
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    augmented_class_precision[cls] = precision
    augmented_class_recall[cls] = recall
    augmented_class_f1[cls] = f1

# Print metrics for augmented images
print(f"Processed {augmented_total_images} augmented test images")
print("\nClass-based metrics for augmented images:")
print("{:<15} {:<10} {:<10} {:<10}".format("Class", "Precision", "Recall", "F1 Score"))
for cls in class_names:
    print("{:<15} {:<10.2f} {:<10.2f} {:<10.2f}".format(cls, augmented_class_precision[cls], augmented_class_recall[cls], augmented_class_f1[cls]))

# Calculate macro-averaged metrics for augmented images
augmented_macro_precision = sum(augmented_class_precision.values()) / len(augmented_class_precision)
augmented_macro_recall = sum(augmented_class_recall.values()) / len(augmented_class_recall)
augmented_macro_f1 = sum(augmented_class_f1.values()) / len(augmented_class_f1)

print("\nMacro-averaged metrics for augmented images:")
print(f"Precision: {augmented_macro_precision:.4f}")
print(f"Recall: {augmented_macro_recall:.4f}")
print(f"F1 Score: {augmented_macro_f1:.4f}")

In [None]:
# Compare results between original and augmented datasets
if total_images > 0 and augmented_total_images > 0:
    print("\nPerformance Comparison - Original vs. Augmented:")
    print("{:<15} {:<15} {:<15} {:<15}".format("Metric", "Original", "Augmented", "Difference"))
    
    # Compare precision
    precision_diff = augmented_macro_precision - macro_precision
    print("{:<15} {:<15.4f} {:<15.4f} {:<+15.4f}".format("Precision", macro_precision, augmented_macro_precision, precision_diff))
    
    # Compare recall
    recall_diff = augmented_macro_recall - macro_recall
    print("{:<15} {:<15.4f} {:<15.4f} {:<+15.4f}".format("Recall", macro_recall, augmented_macro_recall, recall_diff))
    
    # Compare F1 Score
    f1_diff = augmented_macro_f1 - macro_f1
    print("{:<15} {:<15.4f} {:<15.4f} {:<+15.4f}".format("F1 Score", macro_f1, augmented_macro_f1, f1_diff))
    
    # Print per-class comparisons
    print("\nPer-class F1 Score Comparison:")
    print("{:<15} {:<15} {:<15} {:<15}".format("Class", "Original", "Augmented", "Difference"))
    for cls in class_names:
        orig_f1 = class_f1[cls]
        aug_f1 = augmented_class_f1[cls]
        diff = aug_f1 - orig_f1
        print("{:<15} {:<15.4f} {:<15.4f} {:<+15.4f}".format(cls, orig_f1, aug_f1, diff))
    
    # Calculate overall improvement
    if macro_f1 > 0:
        percent_improvement = (f1_diff / macro_f1) * 100 if macro_f1 > 0 else float('inf')
        print(f"\nOverall F1 Score {'improvement' if f1_diff >= 0 else 'reduction'}: {abs(percent_improvement):.2f}%")

## Visualize Results

In [None]:
# Set up the figure and axis for plotting
plt.figure(figsize=(12, 8))

# Bar chart for F1 scores comparison
x = np.arange(len(class_names))
width = 0.35

# Plot F1 scores for original and augmented datasets
original_f1_values = [class_f1[cls] for cls in class_names]
augmented_f1_values = [augmented_class_f1[cls] for cls in class_names]

plt.bar(x - width/2, original_f1_values, width, label='Original')
plt.bar(x + width/2, augmented_f1_values, width, label='Augmented')

plt.xlabel('Vehicle Classes')
plt.ylabel('F1 Score')
plt.title('F1 Score Comparison: Original vs. Augmented')
plt.xticks(x, class_names, rotation=45)
plt.ylim(0, 1.0)
plt.legend()
plt.tight_layout()

plt.show()