In [47]:
# import modules
import os # file 
import shutil
import cv2 # opencv for images
import random
import numpy as np
import pandas as pd
import torch
from ultralytics import YOLO #for obeject detection
import albumentations as A # for image augmentation
from albumentations.pytorch import ToTensorV2 # for image formating
# from tqdm import tqdm  # to show processing progress
# Suppress all warnings
import warnings
warnings.filterwarnings("ignore")



In [48]:
# create direcories to organize images and cleanup for a new to avoid duplicate images 
def reset_directories(directories):
    """
    Check if the specified directories exist. If they do, delete them and recreate them.
    Ensures the directories are clean before use.

    Parameters:
        directories (list): List of directories to reset.
    """
    for dir_path in directories:
        if os.path.exists(dir_path):
            # delete the directory and all its contents
            try:
                shutil.rmtree(dir_path)
                print(f"Deleted existing directory: {dir_path}")
            except Exception as e:
                print(f"Failed to delete {dir_path}. Reason: {e}")
        
        # Recreate the directory
        try:
            os.makedirs(dir_path, exist_ok=True)
            print(f"Recreated directory: {dir_path}")
        except Exception as e:
            print(f"Failed to create directory {dir_path}. Reason: {e}")

# Define directories to reset
directories_to_reset = [
    "datasets/images/train",
    "datasets/images/valid",
    "datasets/images/test",
    "datasets/labels/train",
    "datasets/labels/valid",
    "datasets/labels/test"
]

# Reset directories
reset_directories(directories_to_reset)

print("Directories reset and ready for use.")

Deleted existing directory: datasets/images/train
Recreated directory: datasets/images/train
Deleted existing directory: datasets/images/valid
Recreated directory: datasets/images/valid
Deleted existing directory: datasets/images/test
Recreated directory: datasets/images/test
Deleted existing directory: datasets/labels/train
Recreated directory: datasets/labels/train
Deleted existing directory: datasets/labels/valid
Recreated directory: datasets/labels/valid
Deleted existing directory: datasets/labels/test
Recreated directory: datasets/labels/test
Directories reset and ready for use.


In [49]:
# Set random seed for reproducibility
random.seed(42)

# source directory containing all raw images
source_image_dir = "datasets/raw_images"

# organize dataset directories
base_dir = "datasets"
image_dirs = {
    "train": os.path.join(base_dir, "images/train"),
    "valid": os.path.join(base_dir, "images/valid"),
    "test": os.path.join(base_dir, "images/test")
}
label_dirs = {
    "train": os.path.join(base_dir, "labels/train"),
    "valid": os.path.join(base_dir, "labels/valid"),
    "test": os.path.join(base_dir, "labels/test")
}

# extract the image files
image_files = [f for f in os.listdir(source_image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
random.shuffle(image_files)

# Create DataFrame with file paths and dataset split assignments
df = pd.DataFrame({"filename": image_files})
# dynamically split dataset into train, valid, and test
# split sizes
train_size = int(0.7 * len(df))  # 70% for training
valid_size = int(0.2 * len(df))  # 20% for validation
test_size = len(df) - train_size - valid_size  # remaining 10% for testing

# split labels
train_labels = ["train"] * train_size
valid_labels = ["valid"] * valid_size
test_labels = ["test"] * test_size

# combine and assign to DataFrame
df["split"] = train_labels + valid_labels + test_labels
df.head()



Unnamed: 0,filename,split
0,IMG_0871_MOV-39_jpg.rf.41a6a9b70c2a41fa45e3960...,train
1,004720_jpg.rf.afc486560a4004c7cfd67910af31a29c...,train
2,IMG_0871_mp4-9_jpg.rf.2f7c21e75f95f0f1b1803a70...,train
3,construction-872-_jpg.rf.3403cad2f0566950b2322...,train
4,n457047_jpg.rf.254d1968e99ec5d7bc5e946c03f0c5a...,train


In [None]:
# load YOLOv8 pre-trained model
model = YOLO("yolov8n.pt")

# define augmentation pipeline
augmentation = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=(0, 0, 0), std=(1, 1, 1)),  # No change to pixel values
    ToTensorV2()  # convert to PyTorch tensor 
], bbox_params=A.BboxParams(format='yolo', label_fields=['labels']))

# define function to annotate and process images
def annotate_images(df, split, image_dirs, label_dirs, confidence_threshold=0.5):
    data = []
    split_df = df[df["split"] == split]
    for _, row in split_df.iterrows():
        image_file = row["filename"]
        # load the image
        image_path = os.path.join(source_image_dir, image_file)
        image = cv2.imread(image_path)

        # perform inference using YOLOv8
        results = model(image_path)

        # extract bounding boxes and labels
        bboxes = []
        labels = []
        for result in results[0].boxes:
            box = result.xywhn[0].cpu().numpy()  # Normalized x_center, y_center, width, height
            class_id = int(result.cls[0].cpu().numpy())
            confidence = float(result.conf[0].cpu().numpy())

            # filter by confidence threshold
            if confidence >= confidence_threshold:
                bboxes.append(box.tolist())
                labels.append(class_id)

                # add annotation details to the df list
                data.append({
                    "filename": image_file,
                    "split": split,
                    "class_id": class_id,
                    "confidence": confidence,
                    "x_center": box[0],
                    "y_center": box[1],
                    "width": box[2],
                    "height": box[3],
                })

        # apply augmentation
        if bboxes:  # Only augment if there are bounding boxes
            augmented = augmentation(image=image, bboxes=bboxes, labels=labels)
            image = augmented["image"]
            bboxes = augmented["bboxes"]
            labels = augmented["labels"]

        # convert to numpy format for saving 
        if isinstance(image, torch.Tensor):  # If tensor, convert to numpy for opencv
            image = image.permute(1, 2, 0).cpu().numpy() # changes the order of the tensor dimensions from (C, H, W) (Channel-Height-Width, common in PyTorch) to (H, W, C) (Height-Width-Channel, required by OpenCV and most image libraries).
            image = (image * 255).astype(np.uint8)  # Convert to uint8 for OpenCV

        # save the image to the appropriate directory
        output_image_path = os.path.join(image_dirs[split], image_file)
        cv2.imwrite(output_image_path, image)

        # set YOLO format labels
        label_file = os.path.splitext(image_file)[0] + ".txt"
        label_path = os.path.join(label_dirs[split], label_file)

        # create YOLO format label for the corresponding image
        with open(label_path, "w") as f:
            for bbox, class_id in zip(bboxes, labels):
                # Write each valid detection to the file in YOLO format
                f.write(f"{class_id} {bbox[0]:.6f} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f}\n")

    return data
# annotate and process images for all splits
annotation_data = []
for split in ["train", "valid", "test"]:
    # append each list to annotation data
    annotation_data.extend(annotate_images(df, split, image_dirs, label_dirs, confidence_threshold=0.5))


print("Annotation completed and dataset organized into train, validation, and test directories.")


image 1/1 /Users/fatimatatanda/Library/CloudStorage/OneDrive-Personal/Desktop/USD/Projects/aai-501-final-project/datasets/raw_images/IMG_0871_MOV-39_jpg.rf.41a6a9b70c2a41fa45e39600600ce0ed.jpg: 640x640 1 bus, 1 truck, 54.4ms
Speed: 0.7ms preprocess, 54.4ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/fatimatatanda/Library/CloudStorage/OneDrive-Personal/Desktop/USD/Projects/aai-501-final-project/datasets/raw_images/004720_jpg.rf.afc486560a4004c7cfd67910af31a29c.jpg: 640x640 1 person, 4 cars, 1 cow, 56.1ms
Speed: 0.7ms preprocess, 56.1ms inference, 0.7ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/fatimatatanda/Library/CloudStorage/OneDrive-Personal/Desktop/USD/Projects/aai-501-final-project/datasets/raw_images/IMG_0871_mp4-9_jpg.rf.2f7c21e75f95f0f1b1803a708e06f342.jpg: 640x640 3 persons, 73.4ms
Speed: 0.7ms preprocess, 73.4ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /Users/fatimatatanda/Libra

In [51]:
annotation_df = pd.DataFrame(annotation_data)
annotation_df.head()

Unnamed: 0,filename,split,class_id,confidence,x_center,y_center,width,height
0,IMG_0871_MOV-39_jpg.rf.41a6a9b70c2a41fa45e3960...,train,7,0.734379,0.523683,0.663333,0.118504,0.105876
1,004720_jpg.rf.afc486560a4004c7cfd67910af31a29c...,train,0,0.743985,0.204548,0.387022,0.275569,0.277825
2,004720_jpg.rf.afc486560a4004c7cfd67910af31a29c...,train,2,0.689993,0.369293,0.614387,0.054831,0.048369
3,004720_jpg.rf.afc486560a4004c7cfd67910af31a29c...,train,2,0.58749,0.519868,0.616981,0.059734,0.045449
4,004720_jpg.rf.afc486560a4004c7cfd67910af31a29c...,train,2,0.581372,0.617744,0.605309,0.067972,0.064742
