In [1]:
import os
import torch
import cv2
from ultralytics import YOLO
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from PIL import Image
import pandas as pd
import csv

In [2]:
global_tp = 0
global_fp = 0
global_fn = 0

# model_path = '../datasets/shopping-trolley-5/runs/detect/pretrainedv8n/weights/bestpretrainedv8n.pt'
model_path = '../datasets/shopping-trolley-5/runs/detect/unpretrainedv8n/weights/bestpretrainedv8n.pt'
image_path = '../shopping-trolley-5/test/images'
label_path = '../shopping-trolley-5/test/labels'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = YOLO(model_path)
model = model.to(device)

In [3]:
def predict_images(image_dir):
    predictions = []
    images = []
    filenames = []
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            path = os.path.join(image_dir, filename)
            image = cv2.imread(path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_tensor = torch.from_numpy(image).float() / 255.0
            image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)

            # print(image, image.shape)
            with torch.no_grad():
                detections = model(image_tensor, verbose=False)
                predictions.append(detections[0].boxes.xywh / 640)
            
            images.append(image_tensor)
            filenames.append(filename)

    return predictions, images, filenames


In [4]:
def print_ground(filename, label_path):
    # TO SAVE ALL GROUNDS INTO CSV FILE
    path = os.path.join(label_path, filename + '.txt')
    bounding_boxes = [] 
    with open(path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            components = line.strip().split()
            if len(components) >= 5:
                x = float(components[1])
                y = float(components[2])
                w = float(components[3])
                h = float(components[4])
                bounding_boxes.append([x, y, w, h])

    return np.array(bounding_boxes)

def write_to_csv(filenames, predictions, label_path, csv_filename):
    with open(csv_filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'Type', 'x', 'y', 'w', 'h'])

        for i in range(len(predictions)):
            filename_base = filenames[i][:-4]

            ground_truths = print_ground(filename_base, label_path)
            for ground in ground_truths:
                writer.writerow([filenames[i], 'Ground'] + ground.tolist())

            if len(predictions[i]) > 0:
                for prediction in predictions[i]:
                    writer.writerow([filenames[i], 'Predicted'] + prediction.tolist())
            else:
                writer.writerow([filenames[i], 'Predicted', '-1', '-1', '-1', '-1'])


In [5]:
predictions, image, filenames = predict_images(image_path)
write_to_csv(filenames, predictions, label_path, 'result.csv')
data = pd.read_csv('result.csv')

In [6]:
from collections import defaultdict

def calculate_iou(box1, box2):
    """Calculate the Intersection over Union (IoU) of two bounding boxes."""
    x_left = max(box1[0], box2[0])
    y_top = max(box1[1], box2[1])
    x_right = min(box1[2], box2[2])
    y_bottom = min(box1[3], box2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0  # No overlap

    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = area1 + area2 - intersection_area
    iou = intersection_area / union_area
    return iou

def read_data(filename):
    """
    Read the CSV data into a structure organized by filename and type.
    """
    data = pd.read_csv(filename)
    grouped = defaultdict(lambda: {'Ground': [], 'Predicted': []})
    for _, row in data.iterrows():
        grouped[row['Filename']][row['Type']].append([row['x'], row['y'], row['w'], row['h']])
    return grouped


def match_predictions_to_ground_truths(ground_boxes, predicted_boxes, max_distance=50):
    global global_tp, global_fp, global_fn
    match_found = False
    """Match predictions to ground truths based on the center distance threshold."""
    matches = []
    used_predictions = set()

    for i, ground_box in enumerate(ground_boxes):
        ground_center_x = (ground_box[0] + ground_box[2]) / 2
        ground_center_y = (ground_box[1] + ground_box[3]) / 2
        best_distance = float('inf')
        best_pred_index = -1

        for j, predicted_box in enumerate(predicted_boxes):
            if j in used_predictions:
                continue

            pred_center_x = (predicted_box[0] + predicted_box[2]) / 2
            pred_center_y = (predicted_box[1] + predicted_box[3]) / 2

            dist_x = abs(pred_center_x - ground_center_x)
            dist_y = abs(pred_center_y - ground_center_y)

            if dist_x < max_distance and dist_y < max_distance:
                global_tp += 1
                match_found = True
                # Calculate Euclidean distance just for finding the closest box
                distance = (dist_x**2 + dist_y**2)**0.5
                if distance < best_distance:
                    best_distance = distance
                    best_pred_index = j

        if best_distance < float('inf') and best_pred_index != -1:
            matches.append((i, best_pred_index))
            used_predictions.add(best_pred_index)

        if not match_found:
            global_fn += 1

    for j in range(len(predicted_boxes)):
        if j not in used_predictions:
            global_fp += 1    

    return matches


def draw_bboxes_and_calculate_iou(image_path, grounds, predictions):
    image = Image.open(image_path)
    plt.figure(figsize=(12, 8))
    plt.imshow(image)
    ax = plt.gca()

    ground_boxes = []
    predicted_boxes = []

    for index, row in grounds.iterrows():
        x1 = (row['x'] - row['w'] / 2) * image.width
        y1 = (row['y'] - row['h'] / 2) * image.height
        x2 = (row['x'] + row['w'] / 2) * image.width
        y2 = (row['y'] + row['h'] / 2) * image.height
        ground_boxes.append((x1, y1, x2, y2))
        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='red', facecolor='none')
        ax.add_patch(rect)

    for index, row in predictions.iterrows():
        x1 = (row['x'] - row['w'] / 2) * image.width
        y1 = (row['y'] - row['h'] / 2) * image.height
        x2 = (row['x'] + row['w'] / 2) * image.width
        y2 = (row['y'] + row['h'] / 2) * image.height
        predicted_boxes.append((x1, y1, x2, y2))
        rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor='green', facecolor='none')
        ax.add_patch(rect)

    plt.axis("off")
    plt.show()

    matches = match_predictions_to_ground_truths(ground_boxes, predicted_boxes)
    for match in matches:
        ground_idx, pred_idx = match  # Adjusted to expect only two elements
        print(f"Match found between Ground Box {ground_idx} and Prediction Box {pred_idx}")


In [7]:
def convert_to_corners(x, y, w, h, image_width, image_height):
    x1 = (x - w / 2) * image_width
    y1 = (y - h / 2) * image_height
    x2 = (x + w / 2) * image_width
    y2 = (y + h / 2) * image_height
    return x1, y1, x2, y2

def calculate_image_iou(ground_boxes, predicted_boxes):
    iou_scores = []
    used_predictions = set()

    for i, ground_box in enumerate(ground_boxes):
        best_iou = 0
        best_pred_index = -1

        for j, predicted_box in enumerate(predicted_boxes):
            if j in used_predictions:
                continue
            iou = calculate_iou(ground_box, predicted_box)
            if iou > best_iou:
                best_iou = iou
                best_pred_index = j

        if best_iou > 0:  # Only consider positive IoUs to calculate mean
            iou_scores.append(best_iou)
            used_predictions.add(best_pred_index)

    return np.mean(iou_scores) if iou_scores else 0

def overall_dataset_iou(data):
    filenames = data['Filename'].unique()
    overall_iou = []

    image_width = 640 # TODO: Adjust!
    image_height = 640 # TODO: Adjust!

    for filename in filenames:
        ground_data = data[(data['Filename'] == filename) & (data['Type'] == 'Ground')]
        pred_data = data[(data['Filename'] == filename) & (data['Type'] == 'Predicted')]

        ground_boxes = [convert_to_corners(row['x'], row['y'], row['w'], row['h'], image_width, image_height) for index, row in ground_data.iterrows()]
        predicted_boxes = [convert_to_corners(row['x'], row['y'], row['w'], row['h'], image_width, image_height) for index, row in pred_data.iterrows()]

        file_iou = calculate_image_iou(ground_boxes, predicted_boxes)
        overall_iou.append(file_iou)

    return np.mean(overall_iou)

iou_result = overall_dataset_iou(data)
print(f"The overall mean IoU for the dataset is: {iou_result:.4f}")

The overall mean IoU for the dataset is: 0.8693


In [None]:
filenames = data['Filename'].unique()
for filename in filenames:
    grounds = data[(data['Filename'] == filename) & (data['Type'] == 'Ground')]
    predictions = data[(data['Filename'] == filename) & (data['Type'] == 'Predicted')]
    image_path = os.path.join('../shopping-trolley-5/test/images', filename)
    draw_bboxes_and_calculate_iou(image_path, grounds, predictions)


F1 score calculation

In [9]:
print(global_tp, global_fp, global_fn)
precision = global_tp / (global_tp + global_fp)
recall = global_tp / (global_tp + global_fn)
f1 = 2 * precision * recall / (precision + recall)
print(precision, recall, f1)

543 128 37
0.8092399403874814 0.9362068965517242 0.86810551558753
