In [89]:
from xml.dom.minidom import parse
import cv2
import matplotlib.pyplot as plt
import os
from sklearn.cluster import KMeans
import pickle
from scipy.spatial.distance import cdist
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
import sklearn

In [90]:
def extract_xml_data(file_path, xml_filename):
    label_path = file_path + '/' + xml_filename + '.xml'
    img_path = '/kaggle/input/hard-hat-detection/images' + '/' + xml_filename + '.png'
    domOBJ = parse(label_path)
    root = domOBJ.documentElement
    objects = root.getElementsByTagName("object")

    # for each identified and labelled object in the image, sort it and save it
    for idx, obj in enumerate(objects):
        cls_name = obj.getElementsByTagName("name")[0].childNodes[0].data
        x1 = int(obj.getElementsByTagName("xmin")[0].childNodes[0].data)
        y1 = int(obj.getElementsByTagName("ymin")[0].childNodes[0].data)
        x2 = int(obj.getElementsByTagName("xmax")[0].childNodes[0].data)
        y2 = int(obj.getElementsByTagName("ymax")[0].childNodes[0].data)
        image = cv2.imread(img_path)
#         plt.imshow(image)
        cropped_image = image[y1:y2, x1:x2]
#         plt.imshow(cropped_image)
        if cls_name == 'helmet':
            cv2.imwrite('/kaggle/working/Dataset/Helmet/' + xml_filename + f'_{idx}.png', cropped_image)
        else:
            cv2.imwrite('/kaggle/working/Dataset/No_Helmet/' + xml_filename + f'_{idx}.png', cropped_image)

In [91]:
files = os.listdir('/kaggle/input/hard-hat-detection/annotations')
files = files[:1000]
for file in files:
    file_xml = file.split(".")
    extract_xml_data('/kaggle/input/hard-hat-detection/annotations', file_xml[0])

In [92]:
label2id = {'Helmet': 0, 'No_Helmet':1}

In [93]:
train_data = []
train_label = []
datasetPath = '/kaggle/working/Dataset/' 
for label in list(label2id.keys()):
    print(label)
    images = os.listdir(datasetPath + label + '/')
    for image in images:
        img = cv2.imread(datasetPath + label + '/' + image)
        train_data.append(img)
        train_label.append(label2id[label])

# print(train_data)

Helmet
No_Helmet


In [116]:
print("Labels:\n", {'Helmet': train_label.count(0),'No-Helmet': train_label.count(1)})

Labels:
 {'Helmet': 3805, 'No-Helmet': 1220}


## SIFT Feature Extraction

In [95]:
def extract_sift_features(list_image):

    image_descriptors = []
    sift = cv2.SIFT_create()
    for image in list_image:
        _, descriptor = sift.detectAndCompute(image, None)
        image_descriptors.append(descriptor)

    return image_descriptors

In [96]:
image_descriptors = extract_sift_features(train_data)

In [97]:
all_descriptors = []
for descriptor in image_descriptors:
    if descriptor is not None:
        for des in descriptor:
            all_descriptors.append(des)

## Creating BoW

In [98]:
def kmean_bow(all_descriptors, num_cluster):
    bow_dict = []

    kmeans = KMeans(n_clusters = num_cluster)
    kmeans.fit(all_descriptors)

    bow_dict = kmeans.cluster_centers_

    if not os.path.isfile('bow_dictionary.pkl'):
        pickle.dump(bow_dict, open('bow_dictionary.pkl', 'wb'))

    return bow_dict

In [99]:
num_cluster = 60
BoW = kmean_bow(all_descriptors, num_cluster)



## Create Feature BoW

In [100]:
def create_feature_bow(image_descriptors, BoW, num_cluster):

    X_features = []

    for i in range(len(image_descriptors)):
        features = np.array([0] * num_cluster)

        if image_descriptors[i] is not None:
            distance = cdist(image_descriptors[i], BoW)

            argmin = np.argmin(distance, axis = 1)

            for j in argmin:
                features[j] += 1
        X_features.append(features)

    return X_features

In [101]:
X_features = create_feature_bow(image_descriptors, BoW, num_cluster)

[array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 2, 0, 0, 0, 1, 2, 0, 0, 2,
       1, 2, 1, 0, 1, 1, 2, 0, 0, 2, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 1, 0, 0, 0, 2, 0]), array([0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0]), array([1, 3, 0, 3, 1, 3, 1, 1, 3, 1, 1, 0, 1, 2, 1, 1, 2, 1, 0, 1, 1, 1,
       2, 2, 3, 3, 0, 0, 0, 2, 0, 3, 0, 0, 0, 1, 0, 1, 2, 2, 0, 2, 1, 2,
       0, 2, 2, 1, 1, 2, 0, 1, 2, 0, 0, 0, 1, 3, 6, 2]), array([0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 3, 3, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0]), array([1, 2, 2, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 4, 0, 0, 0, 1, 1, 0, 2, 1,
       1, 2, 1, 2, 0, 0, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 2, 0, 0, 1, 0, 0, 0, 0

## SVM Classifier

In [102]:
X_train, X_test, Y_train, Y_test = train_test_split(X_features, train_label, test_size = 0.2, random_state = 1)
model_svm = sklearn.svm.SVC(C = 30, random_state = 0)

[array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 2, 2, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]), array([0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
       0, 1, 0, 0, 0, 1, 1, 0, 2, 2, 0, 1, 0, 0, 0, 0]), array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), array([1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 5, 0, 1, 0, 0, 1, 2, 1, 1,
       0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 2, 0, 0, 0, 0, 0, 1

In [127]:
# print({'0': Y_test.count(0), '1': Y_test.count(1)})

{'0': 765, '1': 240}


In [103]:
model_svm.fit(X_train, Y_train)

print("score on training set params: ", model_svm.score(X_train, Y_train))

score on training set params:  0.8845771144278607


In [104]:
print("score on testing set params: ", model_svm.score(X_test, Y_test))

score on testing set params:  0.7432835820895523


In [None]:
label_path = file_path + '/' + xml_filename + '.xml'
img_path = '/kaggle/input/hard-hat-detection/images' + '/' + xml_filename + '.png'
domOBJ = parse(label_path)
root = domOBJ.documentElement
objects = root.getElementsByTagName("object")

# for each identified and labelled object in the image, sort it and save it
for idx, obj in enumerate(objects):
    cls_name = obj.getElementsByTagName("name")[0].childNodes[0].data
    x1 = int(obj.getElementsByTagName("xmin")[0].childNodes[0].data)
    y1 = int(obj.getElementsByTagName("ymin")[0].childNodes[0].data)
    x2 = int(obj.getElementsByTagName("xmax")[0].childNodes[0].data)
    y2 = int(obj.getElementsByTagName("ymax")[0].childNodes[0].data)
    image = cv2.imread(img_path)
#         plt.imshow(image)
    cropped_image = image[y1:y2, x1:x2]

In [122]:

img = cv2.imread('/kaggle/input/hard-hat-detection/images/hard_hat_workers4010.png')
train_data = [img]

image_descriptors = extract_sift_features(train_data)

all_descriptors = []
for descriptor in image_descriptors:
    if descriptor is not None:
        for des in descriptor:
            all_descriptors.append(des)
            
num_cluster = 60
BoW = kmean_bow(all_descriptors, num_cluster)

X_features = create_feature_bow(image_descriptors, BoW, num_cluster)
# print(X_features)
pred = model_svm.predict(X_features)
print(pred)



[0]


In [106]:
# filename = 'svm_model.sav'
# pickle.dump(model_svm, open(filename, 'wb'))