# Scripts used to gather, preprocess and manipulate the datasets

In [None]:
import cv2
import tensorflow as tf
import numpy as np
import random
import shutil

In [None]:


cv2.namedWindow("preview")
vc = cv2.VideoCapture(0)

if vc.isOpened(): 
    rval, frame = vc.read()
else:
    rval = False

image_counter = 1

while rval:
    cv2.imshow("preview", frame)
    rval, frame = vc.read()
    key = cv2.waitKey(20)

    if cv2.getWindowProperty("preview", cv2.WND_PROP_VISIBLE) < 1:
        break

    if key == ord('s'):  
        image_filename = f"images_X2/image_{image_counter}.png"
        cv2.imwrite(image_filename, frame)
        image_counter += 1

    if key == 27:  # Exit on ESC
        break

vc.release()
cv2.destroyWindow("preview")


### Captures pirctures by pressing S, and gives them a name based on a defined patern

In [None]:

source_directory = 'images_X2'  

def resize_images(src_dir):
 
    files = os.listdir(src_dir)
    for file_name in files:
        file_path = os.path.join(src_dir, file_name)
        
        if os.path.isdir(file_path):
            continue
        
        image = cv2.imread(file_path)
        if image is None:
            print(f"Unable to read image {file_path}. Skipping.")
            continue
        
        resized_image = cv2.resize(image, (640,640), interpolation=cv2.INTER_AREA)
        
        cv2.imwrite(file_path, resized_image)
        print(f"Resized and saved {file_path}")

resize_images(source_directory)


### Resizes the images in a directory

In [None]:

def rename_files_across_directories(directories, new_name_pattern, start_index=0):

    current_index = start_index
    
    for directory_path in directories:
        if not os.path.isdir(directory_path):
            print(f"Directory not found: {directory_path}")
            continue
        
        files = os.listdir(directory_path)
        files = [f for f in files if os.path.isfile(os.path.join(directory_path, f))]
        
        files.sort()
        
        for filename in files:
            new_filename = new_name_pattern.format(current_index + 1)
            old_file_path = os.path.join(directory_path, filename)
            new_file_path = os.path.join(directory_path, new_filename)

            os.rename(old_file_path, new_file_path)
            print(f"Renamed '{filename}' to '{new_filename}'")
            
            current_index += 1

directories = ['train_yasmin', 'images_X', 'images_X2', 'images_O']
new_name_pattern = 'image_{:03d}.jpg'  
start_index = 1   

rename_files_across_directories(directories, new_name_pattern, start_index)


# For multiple collaborators, to solve the image name conflicts, we can rename images across multiple files.

In [None]:


def split_dataset(image_dir, label_dir, train_img_dir, train_label_dir, val_img_dir, val_label_dir, train_ratio=0.9):
    """
    Splits the dataset into training and validation sets, and moves unmatched images/labels into the validation set.
    
    Args:
    - image_dir (str): Directory containing the images.
    - label_dir (str): Directory containing the labels.
    - train_img_dir (str): Directory to store the training images.
    - train_label_dir (str): Directory to store the training labels.
    - val_img_dir (str): Directory to store the validation images.
    - val_label_dir (str): Directory to store the validation labels.
    - train_ratio (float): Ratio of the dataset to be used for training (default 0.9).
    """
    # Create the destination directories if they don't exist
    os.makedirs(train_img_dir, exist_ok=True)
    os.makedirs(train_label_dir, exist_ok=True)
    os.makedirs(val_img_dir, exist_ok=True)
    os.makedirs(val_label_dir, exist_ok=True)

    # List all image and label files
    image_files = sorted([f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))])
    label_files = sorted([f for f in os.listdir(label_dir) if os.path.isfile(os.path.join(label_dir, f))])

    # Extract base filenames (without extensions) for matching images with labels
    image_basenames = {os.path.splitext(f)[0]: f for f in image_files}
    label_basenames = {os.path.splitext(f)[0]: f for f in label_files}

    # Matched images and labels
    matched_files = set(image_basenames.keys()) & set(label_basenames.keys())

    # Unmatched images and labels
    unmatched_images = set(image_basenames.keys()) - set(label_basenames.keys())
    unmatched_labels = set(label_basenames.keys()) - set(image_basenames.keys())

    # Combine matched images and labels into a list for splitting
    matched_list = [(image_basenames[name], label_basenames[name]) for name in matched_files]
    
    # Shuffle the files to randomize the split
    random.shuffle(matched_list)

    # Calculate the split index for the training set
    split_index = int(len(matched_list) * train_ratio)

    # Move matched files to the appropriate directories
    for i, (img_file, label_file) in enumerate(matched_list):
        img_src_path = os.path.join(image_dir, img_file)
        label_src_path = os.path.join(label_dir, label_file)

        if i < split_index:
            # Move to train directories
            img_dest_path = os.path.join(train_img_dir, img_file)
            label_dest_path = os.path.join(train_label_dir, label_file)
        else:
            # Move to validation directories
            img_dest_path = os.path.join(val_img_dir, img_file)
            label_dest_path = os.path.join(val_label_dir, label_file)

        # Move image and label
        shutil.move(img_src_path, img_dest_path)
        shutil.move(label_src_path, label_dest_path)
        print(f"Moved matched: {img_file} and {label_file} to {'train' if i < split_index else 'valid'}")

    # Move unmatched images to validation
    for img_name in unmatched_images:
        img_src_path = os.path.join(image_dir, image_basenames[img_name])
        img_dest_path = os.path.join(val_img_dir, image_basenames[img_name])
        shutil.move(img_src_path, img_dest_path)
        print(f"Moved unmatched image: {image_basenames[img_name]} to validation")

    # Move unmatched labels to validation
    for label_name in unmatched_labels:
        label_src_path = os.path.join(label_dir, label_basenames[label_name])
        label_dest_path = os.path.join(val_label_dir, label_basenames[label_name])
        shutil.move(label_src_path, label_dest_path)
        print(f"Moved unmatched label: {label_basenames[label_name]} to validation")

# Example usage
image_dir = 'datasets/training/images'   # Original images directory
label_dir = 'datasets/training/labels'   # Original labels directory

train_img_dir = 'train/images'  # Training images directory
train_label_dir = 'train/labels'  # Training labels directory
val_img_dir = 'valid/images'  # Validation images directory
val_label_dir = 'valid/labels'  # Validation labels directory

split_dataset(image_dir, label_dir, train_img_dir, train_label_dir, val_img_dir, val_label_dir)


# Splits the dataset into training and validation sets, and moves unmatched images into the validation set
