# Importing libraries

In [None]:
import numpy as np
import pandas as pd
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import glob
import xml.etree.ElementTree as ET

# Getting the tensorflow custom api 


In [None]:
!git clone --q https://github.com/tensorflow/models.git

In [None]:
%cd /content/models/research

In [None]:
!protoc object_detection/protos/*.proto --python_out=.

In [None]:
# Install TensorFlow Object Detection API.
!cp object_detection/packages/tf2/setup.py .
!python -m pip install .

In [None]:
#testing the model 
!python object_detection/builders/model_builder_tf2_test.py

As we can see that our model is working good here

#Data processing

In [None]:
#lets unzip our files 
#first navigate to the folder in which our data is there 
%cd /content/drive/MyDrive/datasets/customtf2

In [None]:
#now unzip the data containing the annotations and images 
!unzip /content/drive/MyDrive/datasets/customtf2/HardHat_Dataset.zip -d .

# Dividing into train and test data 
The labels need to be formed into 80 and 20 percent of train and test respectively 

In [None]:
#creating two directories  for training and testing data
!mkdir test_labels train_labels

#we have 4750 total number of annoatations so taking 20%= 950 in test and 3800 into train data
!ls annotations/* | sort -R | head -950 | xargs -I{} mv {} test_labels/


# Moving the remaining ( 3800 labels ) to the training dir: `train_labels`
!ls annotations/* | xargs -I{} mv {} train_labels/

# Creating the CSV files of annotations and a “label_map.pbtxt” file

In [None]:
#given in the tensorflow custom detection repository
def xml_to_csv(path):
  classes_names = []
  xml_list = []

  for xml_file in glob.glob(path + '/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for member in root.findall('object'):
      value = (root.find('filename').text,
                  int(root.find('size').find('width').text),
                  int(root.find('size').find('height').text),
                  member[0].text,
                  int(member.find("bndbox").find('xmin').text),
                  int(member.find("bndbox").find('ymin').text),
                  int(member.find("bndbox").find('xmax').text),
                  int(member.find("bndbox").find('ymax').text)
                  )
      xml_list.append(value)
  column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
  xml_df = pd.DataFrame(xml_list, columns=column_name) 
  classes_names = list(set(classes_names))
  classes_names.sort()
  return xml_df, classes_names

In [None]:
#running the above function on all the train and test labels 
for label_path in ['train_labels', 'test_labels']:
  image_path = os.path.join(os.getcwd(), label_path)
  xml_df, classes = xml_to_csv(label_path)
  xml_df.to_csv(f'{label_path}.csv', index=None)
  print(f'Successfully converted {label_path} xml to csv.')

In [None]:
#creating the file label_map.pbtxt
label_map_path = os.path.join("label_map.pbtxt")
pbtxt_content = ""

In [None]:
#adding the class name in the pbtxt file
for i, class_name in enumerate(classes):
    pbtxt_content = (
        pbtxt_content
        + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(i + 1, class_name)
    )

In [None]:
#writing the content in pbtxt file
pbtxt_content = pbtxt_content.strip()
with open(label_map_path, "w") as f:
    f.write(pbtxt_content)
    print('Successfully created label_map.pbtxt ')

Content for label_map.pbtxt is like this <br>
item {<br>
  id: 1 <br>
  name: 'helmet'
}

item {<br>
  id: 2<br>
  name: 'head'<br>
}

item {<br>
  id: 3<br>
  name: 'person'<br>
}


# Creating TF record files

In [None]:
train_csv=pd.read_csv('/content/drive/MyDrive/datasets/customtf2/train_labels.csv')

In [None]:
train_csv['class'].value_counts()

helmet    14445
head       4506
person      602
Name: class, dtype: int64

In [None]:
#For train.record
!python /content/drive/MyDrive/datasets/customtf2/generatetfrecord.py train_labels.csv  label_map.pbtxt images/ train.record

In [None]:
#For test.record 
!python /content/drive/MyDrive/datasets/customtf2/generatetfrecord.py test_labels.csv  label_map.pbtxt images/ test.record

# Using pre trained model from the tf2 api 

we will be using ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8 for this purpose

In [None]:
!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz
!tar -xzvf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz

In [None]:
#now moving the configuration file to our original directory 
!cp /content/models/research/object_detection/configs/tf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config /content/drive/MyDrive/datasets/customtf2

# Training the model

In [None]:
%cd /content/models/research/object_detection

/content/models/research/object_detection


In [None]:
!python model_main_tf2.py --pipeline_config_path=/content/drive/MyDrive/datasets/customtf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.config --model_dir=/content/drive/MyDrive/datasets/customtf2/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0--alsologtostderr

# Exporting the model

In [None]:
!python exporter_main_v2.py --trained_checkpoint_dir=/content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint --pipeline_config_path=/content/models/research/object_detection/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config --output_directory /content/drive/MyDrive/datasets/customtf2/model_50000