# [IAPR 2019:][iapr2019] Special project

**Group members:**
    1- first name and last name,
    2- first name and last name,
    3- first name and last name

**Due date:** 30.05.2019

[iapr2019]: https://github.com/LTS5/iapr-2019


## Description
Please find the description of this special project via [this link].

[this link]: https://github.com/LTS5/iapr-2019/blob/master/project/special_project_description.pdf

## Part 1: Finding varroas by segmentation
Add your implementation for ''**detect_by_segmentation**'' function. Please make sure the input and output follows the mentioned format.

In [4]:
import numpy as np
import skimage
from skimage import morphology
import matplotlib.pyplot as plt
import pandas as pd

In [37]:
def detect_by_segmentation(img):
    '''
    Input: One single image
    Output: A numpy array containing coordonates of all detected varroas, with the following format: 
            [[x_1, y_1, w_1, h_2], [x_2, y_2, w_1, h_2], ..., [x_n, y_n, w_n, h_n]] 
            where ''n'' is the number of detected varroas.
    '''
    img = skimage.color.rgb2gray(img) # convert to the grayscale image
    img = img<0.3 # threshold the image
    img = morphology.binary_opening(img, np.ones((6,6))) # openning to eliminate small areas
    label_img = skimage.measure.label(img) # label all the connected area
    prop = skimage.measure.regionprops(label_img)
    bboxes = []
    for p in prop:
        bboxes.append([p['bbox'][1], p['bbox'][0], p['bbox'][3]-p['bbox'][1], p['bbox'][2]-p['bbox'][0]])
    return bboxes

Add your implementation. Report the Precision, Recall and F1-score, by using all 50 images of the test-set, and considering 0.3 as the IoU threshold.

In [38]:
#Your code
import xml.etree.ElementTree as ET

def parse_file(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymin').text)),
                              int(float(bbox.find('xmax').text))-int(float(bbox.find('xmin').text)),
                              int(float(bbox.find('ymax').text))-int(float(bbox.find('ymin').text))]
        objects.append(obj_struct)

    return objects

In [39]:
def iter_over_union(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3]) # intersection rectangle

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) #the area of intersection rectangle

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) #the area of both the prediction and ground-truth

    iou = interArea / float(boxAArea + boxBArea - interArea) # the intersection over union 
    return iou

def cal_iou(pred_bboxes, gt_bboxes):
    gt_bboxes_new = []
    for gt_bbox in gt_bboxes:
        gt_bboxes_new.append([gt_bbox[0], gt_bbox[1], gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]]) # transform the (x, y, width, heigth) to (x_min, y_min, x_max, y_max) for ground truth
    
    ious = []
    gt_candidate_set = []
    for j, pred_bbox in enumerate(pred_bboxes):
        pre_bbox = (pred_bbox[0], pred_bbox[1], pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]) # transform the (x, y, width, heigth) to (x_min, y_min, x_max, y_max)
        iou = 0
        gt_i = -1 # -1 means the candinate not corresponding to any ground truth
        for i, gt_bbox in enumerate(gt_bboxes_new):
            new_iou = iter_over_union(gt_bbox, pre_bbox) # use the brute force method to calculate the largest IOU for each predicted box
            if new_iou > iou:
                iou = new_iou
                gt_i = i
        gt_candidate_set.append((gt_i,j))
        ious.append(iou)
    return ious,gt_candidate_set

def precision_recall_f1(T, ious,  gt_candidate_set, num_gt_bbox):
    # T is the threshols for true positive samples
    # ious is the list constaining all the ious
    # gt_candidate_set is the gt and candidate pair set
    # num_gt_bbox is the number of ground truth bbox
    total = len(ious)
    ious = np.array(ious)
    tp = np.sum(ious>T) # the number of true positive samples
    fp = np.sum(ious<=T) # the number of false positive samples

    gt_index = [] # use loop to fill the gt index in the list
    for iou, (i, j) in zip(ious.tolist(), gt_candidate_set):
        if (not i in gt_index) and (i!=-1) and (iou>=T):
            gt_index.append(i) # add the gt index that can correspond to our predicted bbox
    fn = num_gt_bbox - len(gt_index) # the number of false negative
    
    precision = tp / (tp+fp) if tp!=0 else 0
    recall = tp / (tp+fn) if tp!=0 else 0
    f1 = 2*precision*recall/(precision+recall) if precision*recall!=0 else 0
    
    return precision, recall, f1

def evaluation(T, pred_bboxes, gt_bboxes):
    ious, gt_candidate_set = cal_iou(pred_bboxes, gt_bboxes)
    precision, recall, f1 = precision_recall_f1(T, ious,  gt_candidate_set, len(gt_bboxes))
    return precision, recall, f1

In [49]:
T = 0.3
file_names = pd.read_csv('./project-data/test.txt',header=None)[0].tolist()
precisions = []
recalls = []
f1s = []
for n in file_names:
    img = skimage.io.imread('./project-data/images/test/'+n+'.jpg')
    gt_bboxes = parse_file('./project-data/annotations/test/'+n+'.xml')
    gt_bboxes = [gb['bbox'] for gb in gt_bboxes]
    pred_bboxes = detect_by_segmentation(img)
    precision, recall, f1 = evaluation(T, pred_bboxes, gt_bboxes)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
    print('Precision: %.2f Recall: %.2f, F1: %.2f'%(precision, recall, f1))
print('Average Precision: %.2f Average Recall: %.2f, Average F1: %.2f'%(np.mean(precisions), np.mean(recalls), np.mean(f1s)))

Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.16 Recall: 0.83, F1: 0.26
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.03 Recall: 0.65, F1: 0.06
Precision: 0.01 Recall: 1.00, F1: 0.01
Precision: 0.04 Recall: 1.00, F1: 0.08
Precision: 0.27 Recall: 0.40, F1: 0.32
Precision: 0.04 Recall: 1.00, F1: 0.08
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.11 Recall: 0.90, F1: 0.19
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.03 Recall: 1.00, F1: 0.05
Precision: 0.05 Recall: 0.50, F1: 0.09
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.06 Recall: 1.00, F1: 0.10
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.01 Recall: 0.75, F1: 0.02
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.02 Recall: 0.88, F1: 0.03
Precision: 0.00 Recall: 0.00, F1: 0.00
Precision: 0.09 Recall: 0

## Part 2: Implement your first detector

Write your function(s) for the second part. Feel free to change the name of the function and add your additional functions, but please make sure their input and output follows the mentioned format.

In [3]:
def detect_by_method_1(img):
    '''
    Input: One single image
    Output: A numpy array containing coordonates of all detected varroas, with the following format: 
            [[x_1, y_1, w_1, h_2], [x_2, y_2, w_1, h_2], ..., [x_n, y_n, w_n, h_n]] 
            where ''n'' is the number of detected varroas.
    '''

    #Your code

Add your implementation. Report the Precision, Recall and F1-score, by using all 50 images of the test-set, and considering 0.3 as the IoU threshold.

In [4]:
#Your code

## Part 3: Using MLP and CNNs

Add your implementation for the thrid part. Feel free to add your desirable functions, but please make sure you have proper functions for the final detection, where their input and output follows the same format as the previous parts.

In [5]:
#Your code

## Challenge

You can generate a json submission file by using the function ''**generate_pred_json**''. This prediction file can be uploaded online for evaluation (Please refer to section 3 of the project description for more details).

In [6]:
import numpy as np
import json

def generate_pred_json(data, tag='baseline'):
    '''
    Input
    - data: Is a dictionary d, such that:
          d = { 
              "ID_1": [], 
              "ID_2": [[x_21, y_21, w_21, h_21], [x_22, y_22, w_22, h_22]], 
              ... 
              "ID_i": [[x_i1, y_i1, w_i1, h_i1], ..., [x_iJ, y_iJ, w_iJ, h_iJ]],
              ... 
              "ID_N": [[x_N1, y_N1, w_N1, h_N1]],
          }
          where ID is the string id of the image (e.i. 5a05e86fa07d56baef59b1cb_32.00px_1) and the value the Kx4 
          array of intergers for the K predicted bounding boxes (e.g. [[170, 120, 15, 15]])
    - tag: (optional) string that will be added to the name of the json file.
    Output
      Create a json file, "prediction_[tag].json", conatining the prediction to EvalAI format.
    '''
    unvalid_key = []
    _data = data.copy()
    for key, value in _data.items():
        try:
            # Try to convert to numpy array and cast as closest int
            print(key)
            v = np.around(np.array(value)).astype(int)
            # Check is it is a 2d array with 4 columns (x,y,w,h)
            if v.ndim != 2 or v.shape[1] != 4:
                unvalid_key.append(key)
            # Id must be a string
            if not isinstance(key, str):
                unvalid_key.append(key)
            _data[key] = v.tolist()
        # Deal with not consistant array size and empty predictions
        except (ValueError, TypeError):
            unvalid_key.append(key)
    # Remove unvalid key from dictionnary
    for key in unvalid_key: del _data[key]
    
    with open('prediction_{}.json'.format(tag), 'w') as outfile:
        json.dump(_data, outfile)

In [7]:
#Your code