This is a notebook for using yolov5 on the hard hat detection data

# Importing libraries

In [None]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from pathlib import Path
from xml.dom.minidom import parse
from shutil import copyfile
from shutil import copy2
import os

# Preparing data

The given data is in the format of XML annotations so we need to convert into yolov5 format which conyains annotatiins in text and a yaml file for navigating through folders 

In [None]:
#making directories in the drive 
!mkdir '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels'
!mkdir '/content/drive/MyDrive/datasets/hard_hat(yolov5)/images'

In [None]:
#defining classes 
classes = ['helmet','head','person']

In [None]:
#the yolov5 txt annotations contains x,y,w and h as the annotations 
def convert_annot(size , box):
  #getting the x,y co-ordinates
    x1 = int(box[0])
    y1 = int(box[1])
    x2 = int(box[2])
    y2 = int(box[3])
    #getting hight and width
    dw = np.float32(1. / int(size[0]))
    dh = np.float32(1. / int(size[1]))

    w = x2 - x1
    h = y2 - y1
    x = x1 + (w / 2)
    y = y1 + (h / 2)

    #converting t hem to x,y,w and h

    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return [x, y, w, h]

Now we need to save the annotations text file for each image 

In [None]:
def save_txt_file(img_jpg_file_name, size, img_box):
  #making the text file according to the image name
    save_file_name = '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/' +  img_jpg_file_name + '.txt'
    print(save_file_name)
    #file_path = open(save_file_name, "a+")
    with open(save_file_name ,'a+') as file_path:
        for box in img_box:
            #gettign the number of classes
            cls_num = classes.index(box[0])
            #convert into annotations
            new_box = convert_annot(size, box[1:])
            #writing the boxes
            file_path.write(f"{cls_num} {new_box[0]} {new_box[1]} {new_box[2]} {new_box[3]}\n")

        file_path.flush()

Now we have xml data so we need to convert it into xml format

In [None]:
def get_xml_data(file_path, img_xml_file):
    img_path = file_path + '/' + img_xml_file + '.xml'
    #print(img_path)

    dom = parse(img_path)
    root = dom.documentElement
    img_name = root.getElementsByTagName("filename")[0].childNodes[0].data
    img_size = root.getElementsByTagName("size")[0]
    objects = root.getElementsByTagName("object")
    img_w = img_size.getElementsByTagName("width")[0].childNodes[0].data
    img_h = img_size.getElementsByTagName("height")[0].childNodes[0].data
    img_c = img_size.getElementsByTagName("depth")[0].childNodes[0].data
   
    img_box = []
    for box in objects:
        cls_name = box.getElementsByTagName("name")[0].childNodes[0].data
        x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)
        y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
        x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
        y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)
        
        img_jpg_file_name = img_xml_file + '.jpg'
        img_box.append([cls_name, x1, y1, x2, y2])
  

    # test_dataset_box_feature(img_jpg_file_name, img_box)
    save_txt_file(img_xml_file, [img_w, img_h], img_box)

In [None]:
!ls '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels'

Getting the xml data and converting it to text format using the function get_xml_data

In [None]:
files = os.listdir('/content/drive/MyDrive/datasets/customtf2/annotations')
for file in files:
    print("file name: ", file)
    file_xml = file.split(".")
    print(file_xml[0])
    get_xml_data('/content/drive/MyDrive/datasets/customtf2/annotations', file_xml[0])

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
hard_hat_workers209
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers209.txt
file name:  hard_hat_workers4719.xml
hard_hat_workers4719
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers4719.txt
file name:  hard_hat_workers2567.xml
hard_hat_workers2567
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers2567.txt
file name:  hard_hat_workers4436.xml
hard_hat_workers4436
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers4436.txt
file name:  hard_hat_workers4891.xml
hard_hat_workers4891
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers4891.txt
file name:  hard_hat_workers4813.xml
hard_hat_workers4813
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers4813.txt
file name:  hard_hat_workers2402.xml
hard_hat_workers2402
/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/hard_hat_workers2402.txt
f

# Splitting Data

In [None]:
#we need to split our data into train and test 
from sklearn.model_selection import train_test_split
image_list = os.listdir('/content/drive/MyDrive/datasets/customtf2/images')
#splitting our data in the ratio of 0.8 and 0.2
train_list, val_list = train_test_split(image_list, test_size=0.2, random_state=42)
print('total =',len(image_list))
print('train :',len(train_list))
print('val   :',len(val_list))

total = 4750
train : 3800
val   : 950


This is the function for copying the images into thier respective folfers of train and val

In [None]:
def copy_data(file_list, img_labels_root, imgs_source, mode):
    #file path for images
    root_file = Path('/content/drive/MyDrive/datasets/hard_hat(yolov5)/images/'+  mode)
    if not root_file.exists():
        print(f"Path {root_file} does not exit")
        os.makedirs(root_file)

    #fiel path for labels 
    root_file = Path('/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/' + mode)
    if not root_file.exists():
        print(f"Path {root_file} does not exit")
        os.makedirs(root_file)

    for file in file_list:               
        img_name = file.replace('.png', '')        
        img_src_file = imgs_source + '/' + img_name + '.png'        
        label_src_file = img_labels_root + '/' + img_name + '.txt'

        if os.path.exists('/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/' + img_name +'.txt') is False:
          print("Corresponding label file doesn't exist")

        else:
          # Copying image
          DICT_DIR = '/content/drive/MyDrive/datasets/hard_hat(yolov5)/images/'  + mode
          img_dict_file = DICT_DIR + '/' + img_name + '.png'

          copyfile(img_src_file, img_dict_file)

          # Copying label
          DICT_DIR = '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels/' + mode
          img_dict_file = DICT_DIR + '/' + img_name + '.txt'
          copy2(label_src_file, img_dict_file)

In [None]:
copy_data(train_list, '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels', '/content/drive/MyDrive/datasets/customtf2/images', "train")
copy_data(val_list,   '/content/drive/MyDrive/datasets/hard_hat(yolov5)/labels', '/content/drive/MyDrive/datasets/customtf2/images', "val")

Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file doesn't exist
Corresponding label file 

We have now different train and validation data so now we can train our yolo model

# Getting YOLOv5 model

In [None]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5

Cloning into 'yolov5'...
remote: Enumerating objects: 14513, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (22/22), done.[K
remote: Total 14513 (delta 19), reused 24 (delta 14), pack-reused 14477[K
Receiving objects: 100% (14513/14513), 13.66 MiB | 10.92 MiB/s, done.
Resolving deltas: 100% (9991/9991), done.
/content/yolov5


# Creating YAML file

In [None]:
import yaml

dict_file = {'train':'/content/drive/MyDrive/datasets/hard_hat(yolov5)/images/train' ,
            'val': '/content/drive/MyDrive/datasets/hard_hat(yolov5)/images/val',
            'nc' : '3',
            'names' : ['helmet','head','person']}

with open('/content/drive/MyDrive/datasets/hard_hat(yolov5)hard_head.yaml', 'w+') as file:
    documents = yaml.dump(dict_file, file)