In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import xml.etree.ElementTree as ET

def read_anntation(xml_file: str):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    bounding_box_list = []

    file_name = root.find('filename').text
    for obj in root.iter('object'):

        object_label = obj.find("name").text
        for box in obj.findall("bndbox"):
            x_min = int(box.find("xmin").text)
            y_min = int(box.find("ymin").text)
            x_max = int(box.find("xmax").text)
            y_max = int(box.find("ymax").text)

        bounding_box = [object_label, x_min, y_min, x_max, y_max]
        bounding_box_list.append(bounding_box)

    return bounding_box_list, file_name

In [None]:
from os import listdir
import cv2
import numpy as np

def read_train_dataset(dir):
    images = []
    annotations = []

    for file in listdir(dir):
        if 'jpg' in file.lower() or 'png' in file.lower():
            images.append(cv2.imread(dir + file, 1))
            annotation_file = file.replace(file.split('.')[-1], 'xml')
            bounding_box_list, file_name = read_anntation(dir + annotation_file)
            annotations.append((bounding_box_list, annotation_file, file_name))

    images = np.array(images)

    return images, annotations

In [None]:
!pip install files

Collecting files
  Downloading https://files.pythonhosted.org/packages/d3/b0/ddfcb3c51ad496bf665d80d3465d8c274592d70312558627c803107e7d7d/files-1.1.1.tar.gz
Building wheels for collected packages: files
  Building wheel for files (setup.py) ... [?25l[?25hdone
  Created wheel for files: filename=files-1.1.1-cp36-none-any.whl size=3671 sha256=b89da0414f80698f0f22af3d48e28e78312d3b2579949c427ed7d8705174a9f0
  Stored in directory: /root/.cache/pip/wheels/2f/b7/02/31aeb19506812ddfcf90210ea4f9f645fd5688f28dc312683b
Successfully built files
Installing collected packages: files
Successfully installed files-1.1.1


In [None]:
!pip install pascal-voc-writer

Collecting pascal-voc-writer
  Downloading https://files.pythonhosted.org/packages/9d/82/dd86999e6062fc34478f11ead7a68e6615d7e270b39624547edd1dbaba76/pascal_voc_writer-0.1.4-py2.py3-none-any.whl
Installing collected packages: pascal-voc-writer
Successfully installed pascal-voc-writer-0.1.4


In [None]:
import imgaug as ia
from imgaug import augmenters as iaa
from files import *
from pascal_voc_writer import Writer

ia.seed(1)

dir = '/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/dummy/'
images, annotations = read_train_dataset(dir)

for idx in range(len(images)):
    image = images[idx]
    boxes = annotations[idx][0]

    ia_bounding_boxes = []
    for box in boxes:
        ia_bounding_boxes.append(ia.BoundingBox(x1=box[1], y1=box[2], x2=box[3], y2=box[4]))

    bbs = ia.BoundingBoxesOnImage(ia_bounding_boxes, shape=image.shape)

    seq = iaa.Sequential([
        iaa.Fliplr(1.0),
        iaa.Sometimes(
        0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    )

    ])

    seq_det = seq.to_deterministic()

    image_aug = seq_det.augment_images([image])[0]
    bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]

    new_image_file = "/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/after_reverse_blur/" + 'after_reverse_' + annotations[idx][2]
    cv2.imwrite(new_image_file, image_aug)

    h, w = np.shape(image_aug)[0:2]
    voc_writer = Writer(new_image_file, w, h)

    for i in range(len(bbs_aug.bounding_boxes)):
        bb_box = bbs_aug.bounding_boxes[i]
        voc_writer.addObject(boxes[i][0], int(bb_box.x1), int(bb_box.y1), int(bb_box.x2), int(bb_box.y2))

    voc_writer.save("/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/after_reverse_blur/" + 'after_reverse_' + annotations[idx][1])

In [None]:
import imgaug as ia
ia.seed(1)
# imgaug uses matplotlib backend for displaying images
%matplotlib inline
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
from imgaug import augmenters as iaa 
# imageio library will be used for image input/output
import imageio
import pandas as pd
import numpy as np
import re
import os
import glob
# this library is needed to read XML files for converting it into CSV
import xml.etree.ElementTree as ET
import shutil

In [None]:
# Function that will extract column data for our CSV file as pandas DataFrame
def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            try:
                value = (root.find('filename').text,
                         int(root.find('size')[0].text),
                         int(root.find('size')[1].text),
                         member[0].text,
                         int(member[4][0].text),
                         int(member[4][1].text),
                         int(member[4][2].text),
                         int(member[4][3].text)
                         )
                xml_list.append(value)
            except:
                pass
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df
   
# apply the function to convert all XML files in images/ folder into labels.csv
labels_df = xml_to_csv('/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/dummy')
# labels_df.to_csv('labels.txt', index=False, header=None, sep="\t")

In [None]:
labels_df.to_csv('/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/dummy/labels.txt', index=False, header=None, sep="\t")

In [None]:
labels_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,10001.jpg,1920,1080,person1,1072,1,1163,190
1,10002.jpg,1920,1080,person1,1074,1,1156,188
2,10003.jpg,1920,1080,person1,982,31,1011,192
3,10004.jpg,1920,1080,person1,874,90,933,310
4,10005.jpg,1920,1080,person1,715,147,828,409


In [None]:
num = (labels_df['xmin'] + labels_df['xmax']) / 2
labels_df['center_x'] = num / labels_df['width']

In [None]:
num = (labels_df['ymin'] + labels_df['ymax']) / 2
labels_df['center_y'] = num / labels_df['height']

In [None]:
num = (labels_df['xmax'] - labels_df['xmin'])
labels_df['w'] = num / labels_df['width']

In [None]:
num = (labels_df['ymax'] - labels_df['ymin'])
labels_df['h'] = num / labels_df['height']
labels_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax,center_x,center_y,w,h
0,10001.jpg,1920,1080,person1,1072,1,1163,190,0.582031,0.088426,0.047396,0.175
1,10002.jpg,1920,1080,person1,1074,1,1156,188,0.580729,0.0875,0.042708,0.173148
2,10003.jpg,1920,1080,person1,982,31,1011,192,0.51901,0.103241,0.015104,0.149074
3,10004.jpg,1920,1080,person1,874,90,933,310,0.470573,0.185185,0.030729,0.203704
4,10005.jpg,1920,1080,person1,715,147,828,409,0.401823,0.257407,0.058854,0.242593


In [None]:
for idx in range(len(labels_df)):
  data = labels_df.iloc[idx]
  filename = data['filename'].split('.')[0]
  if data['class'] == 'person1':
    f = open('/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/dummy/txt_file/{}.txt'.format(filename), 'a')
    wdata = str("0 {} {} {} {}\n".format(data['center_x'], data['center_y'], data['w'], data['h']))
    f.write(wdata)
    f.close()
  else:
    f = open('/content/gdrive/MyDrive/darknet/bin/darknet/trial3(dummy)/dummy/txt_file/{}.txt'.format(filename), 'a')
    wdata = str("1 {} {} {} {}".format(data['center_x'], data['center_y'], data['w'], data['h']))
    f.write(wdata)
    f.close()    