In [None]:
import ultralytics
import os
import shutil
import random
import matplotlib.pyplot as plt
import numpy as np
from ultralytics import YOLO

In [4]:
def split_dataset(images_dir, labels_dir, output_dir, train_ratio=0.8, val_ratio=0.1, test_ratio=0.1):
    # Sanity check
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-6, "Ratios must sum to 1"

    # Make sure output subfolders exist
    for subdir in ['images/train', 'images/val', 'images/test',
                   'labels/train', 'labels/val', 'labels/test']:
        os.makedirs(os.path.join(output_dir, subdir), exist_ok=True)

    # Get image files
    image_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(image_files)

    total = len(image_files)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    splits = {
        'train': image_files[:train_end],
        'val': image_files[train_end:val_end],
        'test': image_files[val_end:]
    }

    for split_name, file_list in splits.items():
        for img_file in file_list:
            base_name = os.path.splitext(img_file)[0]
            label_file = base_name + '.txt'

            # Paths
            src_img = os.path.join(images_dir, img_file)
            src_lbl = os.path.join(labels_dir, label_file)
            dst_img = os.path.join(output_dir, f'images/{split_name}', img_file)
            dst_lbl = os.path.join(output_dir, f'labels/{split_name}', label_file)

            # Copy image
            shutil.copy2(src_img, dst_img)

            # Copy label if it exists, otherwise create empty
            if os.path.exists(src_lbl):
                shutil.copy2(src_lbl, dst_lbl)
            else:
                open(dst_lbl, 'w').close()  # empty file for no-annotation images

    print("✅ Dataset split complete.")
    print(f"Total images: {total}")
    for k in splits:
        print(f"{k.capitalize()}: {len(splits[k])} images")

split_dataset(images_dir='augmented_data/images', labels_dir='augmented_data/labels', output_dir='dataset', train_ratio=0.8, val_ratio=0.1, test_ratio=0.1)

✅ Dataset split complete.
Total images: 4104
Train: 3283 images
Val: 410 images
Test: 411 images


In [2]:
model = YOLO('yolov8s.pt')

In [None]:
results = model.train(
    data='dataset/custom_data.yaml',  
    epochs=25,                      
    imgsz=640,                      
    batch=4,                       
    name='init_trained_model'
)

New https://pypi.org/project/ultralytics/8.3.157 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.156  Python-3.12.4 torch-2.7.1+cpu CPU (11th Gen Intel Core(TM) i5-1135G7 2.40GHz)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset/custom_data.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=init_trained_model7, nbs=64, 

[34m[1mtrain: [0mScanning C:\Users\Acer\Desktop\Pest-Detection\dataset\labels\train.cache... 3283 images, 44 backgrounds, 0 corrupt: 100%|██████████| 3283/3283 [00:00<?, ?it/s][0m


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.20.2 ms, read: 122.1115.7 MB/s, size: 234.0 KB)


[34m[1mval: [0mScanning C:\Users\Acer\Desktop\Pest-Detection\dataset\labels\val.cache... 410 images, 6 backgrounds, 0 corrupt: 100%|██████████| 410/410 [00:00<?, ?it/s][0m


Plotting labels to C:\Users\Acer\Desktop\Pest-Detection\runs\detect\init_trained_model7\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000909, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mC:\Users\Acer\Desktop\Pest-Detection\runs\detect\init_trained_model7[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25         0G      1.693      3.936      1.838         13        640:  14%|█▍        | 115/821 [13:59<2:01:11, 10.30s/it]