In [None]:
import os
from ultralytics import YOLO
import shutil
import random
import zipfile

random.seed(42)

In [None]:
# Unzip the dataset.zip file
path = "./dataset.zip"
with zipfile.ZipFile(path, 'r') as zip_ref:
    print("Extracting dataset...")
    zip_ref.extractall('dataset')

dataset_path = "./dataset"
images_path = os.path.join(dataset_path, "images")
labels_path = os.path.join(dataset_path, "labels")

In [None]:
# Split ratios
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 1 - train_ratio - val_ratio

for set_type in ["train", "val", "test"]:
    for content_type in ["images", "labels"]:
        print(f"Creating {set_type} {content_type} directory...")
        os.makedirs(os.path.join(dataset_path, set_type, content_type), exist_ok=True)
        
all_files = [file for file in os.listdir(images_path) if  os.path.isfile(os.path.join(images_path, file))]
random.shuffle(all_files)

In [None]:
# calculate the split indices
total_files = len(all_files)
train_end = int(train_ratio * total_files)
val_end = int((train_ratio + val_ratio) * total_files)

# Split files
train_files = all_files[:train_end]
val_files = all_files[train_end:val_end]
test_files = all_files[val_end:]

def copy_files(files, set_type):
    print(f"Copying {set_type} files...")
    for file in files:
        shutil.copy(os.path.join(images_path, file), os.path.join(dataset_path, set_type, "images"))
        label_file = file.rsplit(".", 1)[0] + ".txt"
        shutil.copy(os.path.join(labels_path, label_file), os.path.join(dataset_path, set_type, "labels"))
        
copy_files(train_files, "train")
copy_files(val_files, "val")
copy_files(test_files, "test")

print("Dataset split complete.")

In [None]:
# Classes
with open("dataset/classes.txt", "r") as file:
    class_names = [name.strip() for name in file.readlines()]
    
print(class_names)

# Creating a data.yaml file with absolute paths
current_dir = os.path.dirname(os.path.abspath(__file__))
with open("dataset/data.yaml", "w") as file:
    file.write(f"train: {os.path.join(current_dir, 'dataset/train')}\n")
    file.write(f"val: {os.path.join(current_dir, 'dataset/val')}\n")
    file.write(f"test: {os.path.join(current_dir, 'dataset/test')}\n")
    file.write("nc: " + str(len(class_names)) + "\n")
    file.write("names: " + str(class_names))

In [None]:
model = YOLO("yolov8x")

# model.predict("./input_images/image.png", save=True)

results = model.train(data="dataset/data.yaml", epochs=5, imgsz=640)