In [52]:
import os
import math
import random
import numpy as np
import pandas as pd
from PIL import Image, ExifTags
from ultralytics import YOLO
import matplotlib.pyplot as plt 

random.seed(1234)

In [53]:
# preprocess data functions
# image size (to check)
def get_img_size(path):
    image = Image.open(path)
    width, height = image.size
    return(width, height)

# split the data into training and validation
def fill_training_folders(path_oil, path_no_oil, output_dir, size):
    """
    Fills the trainings image folders with resized images

    Args:
        path_oil (str): path to oil photos
        path_no_oil (str): path to no oil photos
        output_dir (str): directory of saved photos
        size (tuple): max dimensions of output size

    Returns:
        None
    """
    # source_folders = ['./pics/oil_pics', './pics/no_oil_pics']
    source_folders = [path_oil, path_no_oil]
    
    for idx, folder in enumerate(source_folders):
        has_oil = idx == 0
        images = []

        for filename in os.listdir(folder):
            input_path = os.path.join(folder, filename)
            
            # Check if the file is an image
            try:
                with Image.open(input_path) as img:
                    exif = img._getexif()
                    if exif is not None:
                        orientation_tag = next(
                            (key for key, value in ExifTags.TAGS.items() if value == "Orientation"), None
                        )
                        if orientation_tag and orientation_tag in exif:
                            orientation = exif[orientation_tag]
                            if orientation == 3:  # Rotated 180 degrees
                                img = img.rotate(180, expand=True)
                            elif orientation == 6:  # Rotated 270 degrees (90 CW)
                                img = img.rotate(270, expand=True)
                            elif orientation == 8:  # Rotated 90 degrees (90 CCW)
                                img = img.rotate(90, expand=True)
                    # img.thumbnail((max_width, max_height))
                    img.thumbnail(size)
                    
                    # Convert to RGB if not already in this mode
                    img = img.convert("RGB")

                    images.append((filename, img))
                    
                    print(f"Processed: {filename}")
            except Exception as e:
                print(f"Skipped {filename}: {e}")

        random.shuffle(images)

        # split into train, val, test (80, 10, 10)
        train_end_dex = math.floor(len(images) * .8 // 1)
        train = images[:train_end_dex]
        
        val_end_dex = train_end_dex + math.floor(len(images) * .1 // 1)
        val = images[train_end_dex:val_end_dex]

        test = images[val_end_dex:]

        for imgs, location in [(train, 'train'), (val, 'val'), (test, 'test')]:
            sub_dir = 'oil' if has_oil else 'no_oil'
            output_folder = f'{output_dir}/{location}/{sub_dir}'
            os.makedirs(output_folder, exist_ok=True)
            for fn, img in imgs:
                output_path = os.path.join(output_folder, fn)
                img.save(output_path, "JPEG")

    print("Completed Image Processing and Splitting")

In [None]:
# preprocess data
input_path_no_oil = "./pics/no_oil_pics"
input_path_oil = "./pics/oil_pics"
output_dir = "./oil_detection_training/datasets"
size = (50, 50)

# getting original photo size
# og_file_path = os.path.join(input_path_no_oil, '0000.jpg')
# size = get_img_size(og_file_path)

# resize and convert
fill_training_folders(input_path_no_oil, input_path_oil, output_dir, size)

Processed: 0071.jpg
Processed: 0065.jpg
Processed: 0059.jpg
Processed: 0105.jpg
Processed: 0111.jpg
Processed: 0110.jpg
Processed: 0104.jpg
Processed: 0058.jpg
Processed: 0064.jpg
Processed: 0070.jpg
Processed: 0066.jpg
Processed: 0072.jpg
Processed: 0099.jpg
Processed: 0112.jpg
Processed: 0106.jpg
Processed: 0107.jpg
Processed: 0113.jpg
Processed: 0098.jpg
Processed: 0073.jpg
Processed: 0067.jpg
Processed: 0063.jpg
Processed: 0077.jpg
Processed: 0088.jpg
Processed: 0117.jpg
Processed: 0103.jpg
Processed: 0102.jpg
Processed: 0116.jpg
Processed: 0089.jpg
Processed: 0076.jpg
Processed: 0062.jpg
Processed: 0048.jpg
Processed: 0074.jpg
Processed: 0060.jpg
Processed: 0100.jpg
Processed: 0114.jpg
Processed: 0115.jpg
Processed: 0101.jpg
Processed: 0061.jpg
Processed: 0075.jpg
Processed: 0049.jpg
Processed: 0012.jpg
Processed: 0006.jpg
Processed: 0007.jpg
Processed: 0013.jpg
Processed: 0005.jpg
Processed: 0011.jpg
Processed: 0039.jpg
Processed: 0038.jpg
Processed: 0010.jpg
Processed: 0004.jpg


In [None]:
# Load a model
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)

# Train the model, set path to desired image size dir.
path = "./oil_detection_training/datasets"
model.train(data=path, epochs=5, device='mps', seed=1234)

Ultralytics 8.3.49 🚀 Python-3.10.8 torch-2.5.1 MPS (Apple M1 Pro)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolov8n-cls.pt, data=/Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets, epochs=5, time=None, patience=100, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train7, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=1234, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=Fals

[34m[1mtrain: [0mScanning /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/train... 205 images, 0 corrupt: 100%|██████████| 205/205 [00:00<00:00, 3239.37it/s]

[34m[1mtrain: [0mNew cache created: /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/train.cache



[34m[1mval: [0mScanning /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/val... 25 images, 0 corrupt: 100%|██████████| 25/25 [00:00<00:00, 3004.09it/s]

[34m[1mval: [0mNew cache created: /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/val.cache
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 26 weight(decay=0.0), 27 weight(decay=0.0005), 27 bias(decay=0.0)





[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 224 train, 224 val
Using 0 dataloader workers
Logging results to [1mruns/classify/train7[0m
Starting training for 5 epochs...

      Epoch    GPU_mem       loss  Instances       Size


        1/5      1.36G      0.739         13        224: 100%|██████████| 13/13 [00:02<00:00,  5.93it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00, 13.94it/s]

                   all       0.48          1






      Epoch    GPU_mem       loss  Instances       Size


        2/5      1.33G     0.6651         13        224: 100%|██████████| 13/13 [00:01<00:00,  9.09it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00, 24.19it/s]

                   all        0.8          1






      Epoch    GPU_mem       loss  Instances       Size


        3/5      1.32G     0.6189         13        224: 100%|██████████| 13/13 [00:01<00:00,  8.82it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00, 20.71it/s]

                   all       0.88          1






      Epoch    GPU_mem       loss  Instances       Size


        4/5      1.33G     0.5682         13        224: 100%|██████████| 13/13 [00:01<00:00,  8.25it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00, 16.27it/s]

                   all       0.96          1






      Epoch    GPU_mem       loss  Instances       Size


        5/5      1.32G     0.5096         13        224: 100%|██████████| 13/13 [00:01<00:00,  9.65it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00, 20.52it/s]

                   all       0.96          1






5 epochs completed in 0.003 hours.
Optimizer stripped from runs/classify/train7/weights/last.pt, 3.0MB
Optimizer stripped from runs/classify/train7/weights/best.pt, 3.0MB

Validating runs/classify/train7/weights/best.pt...
Ultralytics 8.3.49 🚀 Python-3.10.8 torch-2.5.1 MPS (Apple M1 Pro)
YOLOv8n-cls summary (fused): 73 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/train... found 205 images in 2 classes ✅ 
[34m[1mval:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/val... found 25 images in 2 classes ✅ 
[34m[1mtest:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/test... found 27 images in 2 classes ✅ 


               classes   top1_acc   top5_acc: 100%|██████████| 1/1 [00:00<00:00,  8.78it/s]


                   all       0.96          1
Speed: 0.1ms preprocess, 0.4ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/train7[0m


ultralytics.utils.metrics.ClassifyMetrics object with attributes:

confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x31059db10>
curves: []
curves_results: []
fitness: 0.9799999892711639
keys: ['metrics/accuracy_top1', 'metrics/accuracy_top5']
results_dict: {'metrics/accuracy_top1': 0.9599999785423279, 'metrics/accuracy_top5': 1.0, 'fitness': 0.9799999892711639}
save_dir: PosixPath('runs/classify/train7')
speed: {'preprocess': 0.08543968200683594, 'inference': 0.38234710693359375, 'loss': 8.58306884765625e-05, 'postprocess': 0.0001239776611328125}
task: 'classify'
top1: 0.9599999785423279
top5: 1.0

In [56]:
# Validate the model
metrics = model.val() # no arguments needed, dataset and settings remembered
metrics.top1 # top1 accuracy
metrics.top5 # top5 accuracy

Ultralytics 8.3.49 🚀 Python-3.10.8 torch-2.5.1 MPS (Apple M1 Pro)
YOLOv8n-cls summary (fused): 73 layers, 1,437,442 parameters, 0 gradients, 3.3 GFLOPs
[34m[1mtrain:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/train... found 205 images in 2 classes ✅ 
[34m[1mval:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/val... found 25 images in 2 classes ✅ 
[34m[1mtest:[0m /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/test... found 27 images in 2 classes ✅ 


[34m[1mval: [0mScanning /Users/anthony/Desktop/Folder/mystuff/249/OilDetector/oil_detection_training/datasets/val... 25 images, 0 corrupt: 100%|██████████| 25/25 [00:00<?, ?it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 2/2 [00:00<00:00, 20.73it/s]


                   all       0.96          1
Speed: 0.1ms preprocess, 0.7ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/train72[0m


1.0

In [None]:
# performance metrics calculations
# test_folder = './data/test'
test_folder = "./oil_detection_training/datasets/test"

pred_map = {
    'no_oil': 0,
    "oil": 1
}

total_loss = 0
image_count = 0
correct_count = 0

for sub_dir in ['no_oil', 'oil']:
    answer = pred_map[sub_dir]
    input_folder = f'{test_folder}/{sub_dir}'

    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        test_img = Image.open(input_path)
        res = model.predict(test_img)[0]

        probabilities = res.probs
        prediction = probabilities.top1
        if prediction == answer:
            correct_count += 1

        confidence = probabilities.top1conf.item()
        loss = 1 - confidence
        total_loss += loss
        image_count += 1


accuracy = correct_count/image_count
average_loss = total_loss/image_count

print(f'Accuracy: {accuracy}, avg loss: {average_loss}')


0: 224x224 no_oil 0.74, oil 0.26, 4.6ms
Speed: 3.8ms preprocess, 4.6ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.75, oil 0.25, 5.2ms
Speed: 2.1ms preprocess, 5.2ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.58, oil 0.42, 5.1ms
Speed: 1.9ms preprocess, 5.1ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.82, oil 0.18, 37.8ms
Speed: 3.2ms preprocess, 37.8ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.57, oil 0.43, 4.9ms
Speed: 2.0ms preprocess, 4.9ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.66, oil 0.34, 5.1ms
Speed: 1.5ms preprocess, 5.1ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.64, oil 0.36, 5.3ms
Speed: 2.4ms preprocess, 5.3ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 no_oil 0.64, oil 0.36, 4.9m