In [7]:
import os
import torch
import cv2
from ultralytics import YOLO
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

In [8]:
model_path = '../datasets/shopping-trolley-5/runs/detect/pretrainedv8n/weights/bestpretrainedv8n.pt'
image_path = '../shopping-trolley-5/test/images'
label_path = '../shopping-trolley-5/test/labels'
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = YOLO(model_path)
model = model.to(device)
# model.eval()

In [9]:
def predict_images(image_dir):
    predictions = []
    images = []
    filenames = []
    for filename in os.listdir(image_dir):
        if filename.endswith('.jpg'):
            path = os.path.join(image_dir, filename)
            image = cv2.imread(path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image_tensor = torch.from_numpy(image).float() / 255.0
            image_tensor = image_tensor.permute(2, 0, 1).unsqueeze(0)

            # print(image, image.shape)
            with torch.no_grad():
                detections = model(image_tensor, verbose=False)
                predictions.append(detections[0].boxes.xywh / 640)
            
            images.append(image_tensor)
            filenames.append(filename)

    return predictions, images, filenames
            
def load_labels(label_dir):
    labels = []
    for filename in os.listdir(label_dir):
        if filename.endswith('.txt'):
            path = os.path.join(label_dir, filename)
            with open(path, 'r') as file:
                labels[filename[:-4]] = file.readlines()
    return labels

def compute_metrics(predictions, true_labels):
    return 0

In [10]:
def bboxvisualization(img_tensor, edgecolor=None, labels_ground=None, labels_xyxy=None, transforms=None):
    if labels_ground is not None:  # Also means there is a transform performed
        _, cx, cy, w, h = labels_ground
        transformed_size = img_tensor.shape[-2:]  # the resized shape
        x1 = (cx - w / 2) * transformed_size[0]
        y1 = (cy - h / 2) * transformed_size[1]
        x2 = (cx + w / 2) * transformed_size[0]
        y2 = (cy + h / 2) * transformed_size[1]
    else:
        x1, y1, x2, y2 = labels_xyxy.cpu().numpy()

    fig, ax = plt.subplots(1)
    # Convert the tensor to format suitable for imshow
    img_display = img_tensor # Adjusting channel position for matplotlib
    if img_display.shape[2] == 1:  # If the image is grayscale, convert it to 'RGB' for proper display
        img_display = img_display.repeat(1, 1, 3)
    ax.imshow(img_display.cpu().numpy())  # Ensure tensor is moved to CPU and converted to numpy for display
    rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, edgecolor=edgecolor if edgecolor else 'r', facecolor='none')
    ax.add_patch(rect)
    plt.tight_layout()
    plt.axis("off")
    plt.show()

In [12]:
def print_groundtruth(filename, label_path):
    path = os.path.join(label_path, filename + '.txt')
    with open(path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            line = line.split(' ')
            label = int(line[0])
            x = float(line[1])
            y = float(line[2])
            w = float(line[3])
            h = float(line[4])
            print(label, x, y, w, h)

In [16]:
predictions, image, filename = predict_images(image_path)
# labels = load_labels(label_path)

for i in range(len(predictions)):
    print(filename[i])
    print()
    print_groundtruth(filename[i][:-4], label_path)
    print()
    print(predictions[i])
    print('\n\n')
# print()
# print(predictions[-1])  
# print(len(predictions))
# bboxvisualization(image, labels_xyxy = predictions[0][0])

00826_jpg.rf.2e2e220f59b934cae4a574b8c7a43e7a.jpg

1 0.5921875 0.7390625 0.1921875 0.40703125

tensor([[0.6209, 0.7625, 0.1509, 0.4489],
        [0.1033, 0.8686, 0.2061, 0.2629],
        [0.1553, 0.8645, 0.1351, 0.2703],
        [0.7467, 0.9071, 0.3923, 0.1859],
        [0.8002, 0.8998, 0.2886, 0.2001],
        [0.0783, 0.8988, 0.1548, 0.2024]], device='cuda:0')



01RRATHAEXT4_jpg.rf.7b50b7fa5d0b676121e99bf237f35dd8.jpg

1 0.47265625 0.66015625 0.5421875 0.5875

tensor([[0.4698, 0.6682, 0.5301, 0.5669]], device='cuda:0')



0463_jpg.rf.eca07f4f2da1e58cf3a4fa1941737121.jpg

1 0.5546875 0.5984375 0.153125 0.46328125
1 0.15 0.79921875 0.28671875 0.4015625
1 0.74140625 0.5578125 0.109375 0.2484375

tensor([[0.1481, 0.8010, 0.2960, 0.3980],
        [0.5484, 0.6170, 0.1480, 0.4122],
        [0.7387, 0.5558, 0.1018, 0.2530]], device='cuda:0')



05DDU4JUP6VQ_jpg.rf.4e4bcedf30f8f0fe7b266fff0c736600.jpg

0 0.5140625 0.6421875 0.453125 0.45390625

tensor([[0.5767, 0.6337, 0.2821, 0.4613],
     