# The notebook uses the k-means clustering algorithm to derive the highest anchor ratio of the IOU.

## Define the core method

Import core packages

In [1]:
import numpy as np

### Method of IoU calculation

In [3]:
def iou(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou_ = intersection / (box_area + cluster_area - intersection)

    return iou_

def avg_iou(boxes, clusters):
    """
    Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: average IoU as a single float
    """
    return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])


def translate_boxes(boxes):
    """
    Translates all the boxes to the origin.
    :param boxes: numpy array of shape (r, 4)
    :return: numpy array of shape (r, 2)
    """
    new_boxes = boxes.copy()
    for row in range(new_boxes.shape[0]):
        new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
        new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
    return np.delete(new_boxes, [0, 1], axis=1)


def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]

    while True:
        for row in range(rows):
            distances[row] = 1 - iou(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

## Write analysis codes

Import packages

In [14]:
import glob
import xml.etree.ElementTree as ET
import cv2

### Import load XML annotations function

In [19]:
def load_dataset(path):
    dataset = []
    paths = [p.replace("\\", '/') for p in glob.glob("{}/*.xml".format(path))]
    print("Get %d xmls" % len(paths))
    for xml_file in paths:
        tree = ET.parse(xml_file)
        img_file = xml_file.replace(".xml", ".jpg")
        height, width = cv2.imread(img_file, 0).shape

        # height = int(tree.findtext("./size/height"))
        # width = int(tree.findtext("./size/width"))
        # if H == height and W == width:
        #     print("Pass.")

        for obj in tree.iter("object"):
            xmin = int(obj.findtext("bndbox/xmin")) / width
            ymin = int(obj.findtext("bndbox/ymin")) / height
            xmax = int(obj.findtext("bndbox/xmax")) / width
            ymax = int(obj.findtext("bndbox/ymax")) / height

            dataset.append([xmax - xmin, ymax - ymin])

    return np.array(dataset)

In [None]:
ANNOTATIONS_PATH = "path/to/train"

### Number of cluster, cluster = 3

In [20]:
CLUSTERS = 3
data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Get 635 xmls
Accuracy: 78.03%
Boxes:
 [[0.51944444 0.4453125 ]
 [0.93240741 0.40026042]
 [0.78796296 0.77152778]]
Ratios:
 [1.02, 1.17, 2.33]


### Number of cluster, cluster = 5

In [21]:
CLUSTERS = 5
data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Get 635 xmls
Accuracy: 82.22%
Boxes:
 [[0.91759259 0.36619792]
 [0.66203704 0.59314693]
 [0.82037037 0.8140625 ]
 [0.95       0.48776042]
 [0.49166667 0.4203125 ]]
Ratios:
 [1.01, 1.12, 1.17, 1.95, 2.51]


### Number of cluster, cluster = 7

In [22]:
CLUSTERS = 7
data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Get 635 xmls
Accuracy: 84.41%
Boxes:
 [[0.47546296 0.4171875 ]
 [0.82314815 0.67135417]
 [0.91203704 0.87207602]
 [0.63287037 0.54162809]
 [0.97361111 0.48368056]
 [0.71196755 0.80555556]
 [0.91666667 0.36614583]]
Ratios:
 [0.88, 1.05, 1.14, 1.17, 1.23, 2.01, 2.5]


### Number of cluster, cluster = 9

In [23]:
CLUSTERS = 9
data = load_dataset(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}".format(out))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

Get 635 xmls
Accuracy: 85.67%
Boxes:
 [[0.44074074 0.40104167]
 [0.85740741 0.68385417]
 [0.92685185 0.87986111]
 [0.91712963 0.36619792]
 [0.68461538 0.61875   ]
 [0.73101852 0.82357625]
 [0.59305556 0.50651042]
 [0.54490741 0.42161458]
 [0.97361111 0.48368056]]
Ratios:
 [0.89, 1.05, 1.1, 1.11, 1.17, 1.25, 1.29, 2.01, 2.5]
