In [2]:
import pandas as pd
import numpy as np
import os
import glob
import xml.etree.ElementTree as ET 
import random
import shutil
import fiftyone as fo
import fiftyone.zoo as foz
from ultralytics import YOLO
from torchvision.io.image import read_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

In [3]:
session = fo.launch_app()

In [2]:
path = "./fish-tracking-dataset/annotations.xml" 
# XML files are used to hold data, in this case, there are multiple image
# files with multiple fish in every image. We

In [3]:
dataset = []

for anno in glob.glob(path):
    tree = ET.parse(anno)
    root = tree.getroot()
    
    for image_elem in root.iter("image"):
        image_attrs = image_elem.attrib
        image_data = {
            "filename": image_attrs['name'],
            "width": float(image_attrs['width']),
            "height": float(image_attrs['height']),
            "boxes": []
        }
        
        for box_elem in image_elem.iter("box"):
            box_attrs = box_elem.attrib
            box_data = {
                "label": box_attrs['label'],
                "occluded": int(box_attrs['occluded']),
                "xtl": float(box_attrs['xtl']),
                "ytl": float(box_attrs['ytl']),
                "xbr": float(box_attrs['xbr']),
                "ybr": float(box_attrs['ybr'])
            }
            image_data["boxes"].append(box_data)
        
        dataset.append(image_data)

In [4]:
data0=pd.DataFrame(dataset)
display(data0)

Unnamed: 0,filename,width,height,boxes
0,images/01.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 115.0..."
1,images/02.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 43.47..."
2,images/03.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 1, 'xtl': 0.0, ..."
3,images/04.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 1, 'xtl': 0.0, ..."
4,images/05.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 1, 'xtl': 400.9..."
...,...,...,...,...
94,images/95.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 1017...."
95,images/96.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 996.4..."
96,images/97.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 983.9..."
97,images/98.jpg,1752.0,986.0,"[{'label': 'fish', 'occluded': 0, 'xtl': 972.4..."


In [5]:
data0["boxes"][0]

[{'label': 'fish',
  'occluded': 0,
  'xtl': 115.06,
  'ytl': 284.8,
  'xbr': 249.0,
  'ybr': 394.0},
 {'label': 'fish',
  'occluded': 1,
  'xtl': 581.88,
  'ytl': 289.74,
  'xbr': 683.72,
  'ybr': 393.12},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 583.38,
  'ytl': 478.65,
  'xbr': 769.57,
  'ybr': 599.53},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 1014.1,
  'ytl': 500.26,
  'xbr': 1106.98,
  'ybr': 625.76},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 203.38,
  'ytl': 610.53,
  'xbr': 314.48,
  'ybr': 778.21},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 375.68,
  'ytl': 766.89,
  'xbr': 510.44,
  'ybr': 836.84},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 1130.1,
  'ytl': 184.81,
  'xbr': 1257.22,
  'ybr': 270.53},
 {'label': 'fish',
  'occluded': 1,
  'xtl': 1303.47,
  'ytl': 332.17,
  'xbr': 1362.19,
  'ybr': 390.46},
 {'label': 'fish',
  'occluded': 0,
  'xtl': 860.56,
  'ytl': 406.65,
  'xbr': 965.02,
  'ybr': 536.78}]

In [6]:
def normalize_bbox(xtl, ytl, xbr, ybr, img_width, img_height):
    """
    Convert bounding box from top-left and bottom-right coordinates to normalized top-left (x, y) and (width, height).
    
    Parameters:
    xtl (float): x-coordinate of the top-left corner
    ytl (float): y-coordinate of the top-left corner
    xbr (float): x-coordinate of the bottom-right corner
    ybr (float): y-coordinate of the bottom-right corner
    img_width (float): Width of the image
    img_height (float): Height of the image
    
    Returns:
    tuple: Normalized (xtl_norm, ytl_norm, width_norm, height_norm)
    """
    # Calculate the width and height of the bounding box
    bbox_width = xbr - xtl
    bbox_height = ybr - ytl
    
    # Normalize top-left corner and dimensions
    xtl_norm = xtl / img_width
    ytl_norm = ytl / img_height
    bbox_width_norm = bbox_width / img_width
    bbox_height_norm = bbox_height / img_height
    
    return xtl_norm, ytl_norm, bbox_width_norm, bbox_height_norm


In [7]:
!ls fish-tracking-dataset

annotations.xml  boxes	fish.csv  images


In [8]:
dataset_fo = fo.Dataset("new_fish_dataset")

In [9]:
for i, row in data0.iterrows():
    sample = fo.Sample("fish-tracking-dataset/" + row["filename"])
    dataset_fo.add_sample(sample)


In [10]:
print(dataset_fo)

Name:        new_fish_dataset
Media type:  image
Num samples: 99
Persistent:  False
Tags:        []
Sample fields:
    id:       fiftyone.core.fields.ObjectIdField
    filepath: fiftyone.core.fields.StringField
    tags:     fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)


In [1]:
session = fo.launch_app(dataset_fo)

NameError: name 'fo' is not defined

In [12]:
!ls /home/tylershaughnessy/Documents/cphack/fish-tracking-datasetimages/01.jpg

ls: cannot access '/home/tylershaughnessy/Documents/cphack/fish-tracking-datasetimages/01.jpg': No such file or directory


In [13]:
model = YOLO("yolov8s.pt")

In [14]:
dataset_fo.apply_model(model, label_field="YOLOv8")



 100% |███████████████████| 99/99 [21.8s elapsed, 0s remaining, 4.7 samples/s]      


In [15]:
def convert_xyxy_boxes(sample, boxes):
    new_boxes = []
    
    for box in boxes:
        
        # Normalize X and Y by width and height
        nx = box[0] / sample.metadata.width
        ny = box[1] / sample.metadata.height
        
        # Calculate width and height and normalize as well
        nw = (box[2] - box[0]) / sample.metadata.width
        nh = (box[3] - box[1]) / sample.metadata.height
        new_box = [nx, ny, nw, nh]
        new_boxes.append(new_box)
        
    return new_boxes

In [16]:
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)

# Compute Metadata to collect each samples width and height
dataset_fo.compute_metadata() 

for sample in dataset_fo:
    
    # Step 1: Load the image
    image = read_image(sample.filepath)

    # Step 2: Preform preprocessing
    preprocess = weights.transforms()


    batch = [preprocess(image)]

    # Step 3: Inference on the image
    model.eval()
    prediction = model(batch)[0]
    
    # Step 4: Grab the prediction and confidence
    labels = [weights.meta["categories"][i] for i in prediction["labels"]]
    confs = prediction["scores"].tolist()
    fo_boxes = convert_xyxy_boxes(sample, prediction["boxes"].tolist())
    detections = []
    
    for cls, box, conf in zip(labels, fo_boxes, confs):
        
        det = fo.Detection(label=cls, bounding_box=box, confidence=conf)
        detections.append(det)
        
    sample["torchvision"] = fo.Detections(detections=detections)
    sample.save()

Computing metadata...
 100% |███████████████████| 99/99 [44.3ms elapsed, 0s remaining, 2.2K samples/s] 


In [17]:
session.show()