In [None]:
# Mount Google Drive to Google Colab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
from pathlib import Path
from xml.dom.minidom import parse
from shutil import copyfile
import os
import numpy as np
import pandas as pd

In [None]:
classes = ['helmet','head','person']

def convert_annot(size , box):
    x1 = int(box[0])
    y1 = int(box[1])
    x2 = int(box[2])
    y2 = int(box[3])

    dw = np.float32(1. / int(size[0]))
    dh = np.float32(1. / int(size[1]))

    w = x2 - x1
    h = y2 - y1
    x = x1 + (w / 2)
    y = y1 + (h / 2)

    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return [x, y, w, h]

In [None]:
def save_txt_file(img_jpg_file_name, size, img_box):
    save_file_name = '/content/drive/My Drive/helmet-dataset/labels/' +  img_jpg_file_name + '.txt'

    #file_path = open(save_file_name, "a+")
    with open(save_file_name ,'a+') as file_path:
        for box in img_box:

            cls_num = classes.index(box[0])

            new_box = convert_annot(size, box[1:])

            file_path.write(f"{cls_num} {new_box[0]} {new_box[1]} {new_box[2]} {new_box[3]}\n")

        file_path.flush()
        file_path.close()

In [None]:
def get_xml_data(file_path, img_xml_file):
    img_path = file_path + '/' + img_xml_file + '.xml'
    #print(img_path)

    dom = parse(img_path)
    root = dom.documentElement
    img_name = root.getElementsByTagName("filename")[0].childNodes[0].data
    img_size = root.getElementsByTagName("size")[0]
    objects = root.getElementsByTagName("object")
    img_w = img_size.getElementsByTagName("width")[0].childNodes[0].data
    img_h = img_size.getElementsByTagName("height")[0].childNodes[0].data
    img_c = img_size.getElementsByTagName("depth")[0].childNodes[0].data

    img_box = []
    for box in objects:
        cls_name = box.getElementsByTagName("name")[0].childNodes[0].data
        x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)
        y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
        x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
        y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)

        img_jpg_file_name = img_xml_file + '.jpg'
        img_box.append([cls_name, x1, y1, x2, y2])


    # test_dataset_box_feature(img_jpg_file_name, img_box)
    save_txt_file(img_xml_file, [img_w, img_h], img_box)

In [None]:
os.makedirs('/content/drive/My Drive/helmet-dataset/labels',exist_ok = True)

files = os.listdir('/content/drive/My Drive/helmet-dataset/annotations')
for file in files:
    file_xml = file.split(".")
    get_xml_data('/content/drive/My Drive/helmet-dataset/annotations', file_xml[0])

In [None]:
from sklearn.model_selection import train_test_split
image_list = os.listdir('/content/drive/My Drive/helmet-dataset/images')
train_list, test_list = train_test_split(image_list, test_size=0.2, random_state=42)
val_list, test_list = train_test_split(test_list, test_size=0.5, random_state=42)
print('total =',len(image_list))
print('train :',len(train_list))
print('val   :',len(val_list))
print('test  :',len(test_list))

total = 5003
train : 4002
val   : 500
test  : 501


In [None]:
from pathlib import Path
from shutil import copyfile
from tqdm import tqdm

def copy_data(file_list, img_labels_root, imgs_source, mode):
    dataset_root = Path('/content/drive/My Drive/helmet-dataset/')

    # Create directories if they don't exist
    images_path = dataset_root / 'images' / mode
    labels_path = dataset_root / 'labels' / mode
    images_path.mkdir(parents=True, exist_ok=True)
    labels_path.mkdir(parents=True, exist_ok=True)

    # Copying files with progress bar
    for file in tqdm(file_list, desc=f"Copying {mode} data"):
        base_filename = file.replace('.png', '')

        img_src_file = Path(imgs_source) / (base_filename + '.png')
        label_src_file = Path(img_labels_root) / (base_filename + '.txt')

        img_dest_file = images_path / (base_filename + '.png')
        label_dest_file = labels_path / (base_filename + '.txt')

        copyfile(img_src_file, img_dest_file)
        copyfile(label_src_file, label_dest_file)

# Example usage
copy_data(train_list, '/content/drive/My Drive/helmet-dataset/labels', '/content/drive/My Drive/helmet-dataset/images', "train")
copy_data(val_list,   '/content/drive/My Drive/helmet-dataset/labels', '/content/drive/My Drive/helmet-dataset/images', "val")
copy_data(test_list,  '/content/drive/My Drive/helmet-dataset/labels', '/content/drive/My Drive/helmet-dataset/images', "test")

In [None]:
!ls "/content/drive/My Drive/helmet-dataset/images"

^C


In [None]:
!git clone https://github.com/ultralytics/ultralytics
!pip install ultralytics

Cloning into 'ultralytics'...
remote: Enumerating objects: 21075, done.[K
remote: Counting objects: 100% (126/126), done.[K
remote: Compressing objects: 100% (109/109), done.[K
remote: Total 21075 (delta 37), reused 62 (delta 17), pack-reused 20949[K
Receiving objects: 100% (21075/21075), 12.78 MiB | 15.10 MiB/s, done.
Resolving deltas: 100% (14568/14568), done.
Collecting ultralytics
  Downloading ultralytics-8.1.6-py3-none-any.whl (705 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m705.0/705.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.6


In [None]:
import yaml

# Create configuration
config = {
   "path": "/content/drive/My Drive/helmet-dataset/images",
   "train": "train",
   "val": "val",
   "test": "test",
   "nc": 3,
   "names": ['helmet','head','person']
}
with open("data.yaml", "w") as file:
   yaml.dump(config, file, default_flow_style=False)

In [None]:
!cat data.yaml

names:
- helmet
- head
- person
nc: 3
path: /content/drive/My Drive/helmet-dataset/images
test: test
train: train
val: val


In [None]:
!yolo task=detect mode=train data=data.yaml model=yolov8s.pt epochs=20 lr0=0.01

Downloading https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s.pt to 'yolov8s.pt'...
100% 21.5M/21.5M [00:00<00:00, 222MB/s]
Ultralytics YOLOv8.1.6 🚀 Python-3.10.12 torch-2.1.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=data.yaml, epochs=20, time=None, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=