**Import**

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
%matplotlib inline

**Utils**

In [2]:
def plot_image(img):
    plt.figure(figsize=(20,20))
    plt.imshow(plt.imread(img))

def visualize_detection(image_path, bbox):
    plt.figure(figsize=(20,10))
    image_np = plt.imread(image_path)
    for i in range(len(bbox)):
        y1 = bbox[i][0]
        x1 = bbox[i][1]
        y2 = bbox[i][2]
        x2 = bbox[i][3]
        cv2.rectangle(image_np, (x1, y1), (x2, y2), (255, 0, 0), 2)
    plt.imshow(image_np)
    return None

**Data Path**

In [3]:
data_path = '/kaggle/input/tensorflow-great-barrier-reef/'
train_image_path = '/kaggle/input/tensorflow-great-barrier-reef/train_images'
train_data_path = os.path.join(data_path, 'train.csv')

**Parameters**

In [4]:
train_data = pd.read_csv(train_data_path)
train_data.head(10)

**Get the Data**

In [5]:
train_data = pd.read_csv(train_data_path)

def define_image_path(x):
    return os.path.join(train_image_path, "video_" + str(x.split("-")[0]), x.split("-")[1] + ".jpg")

def get_image_dict(train_data):
    image_dict = {}
    for index, row in train_data.iterrows():
        values = row["annotations"].replace("[", "").replace("]", "")
        if len(values) == 0:
            bb = []
        else:
            val = values.replace("'x': ", "").replace("'y': ", "").replace("'width': ", "").replace("'height': ", "")
            val = val.split("},")
            bb = []
            for x in val:
                bboxes = x.replace("{", "").replace("}", "").split(",")
                y1 = int(bboxes[1])
                x1 = int(bboxes[0])
                y2 = y1 + int(bboxes[3])
                x2 = x1 + int(bboxes[2])
                bb.append([y1, x1, y2, x2])
        image_dict[row["image_path"]] = bb
    return image_dict
    
train_data['image_path'] = train_data['image_id'].apply(define_image_path)
train_data = train_data[['image_path', 'annotations']]
image_dict = get_image_dict(train_data)

In [38]:
data = []
for key, val in image_dict.items():
    data.append([key, len(val)])
df = pd.DataFrame(data=data, columns=['image_id', 'no_annotations'])
print("Total number of training examples: ", len(df))
grp = df.groupby("no_annotations", as_index=False)["image_id"].count()
plt.figure(figsize=(10,6))
plt.bar(np.array(grp["no_annotations"].values, dtype=np.str), grp["image_id"])
plt.xlabel("number of annotations")
plt.ylabel("number of examples")
# images with annotation
print("Image with some annotation: ", len(df[df["no_annotations"] > 0]))
print("Image size: ", plt.imread(df[df["no_annotations"] > 0]["image_id"].values[0]).shape)

In [48]:
# image with 1 detection
img = df[df["no_annotations"] == 1]["image_id"].values[0]
visualize_detection(img, image_dict[img])
# image with 2 detection
img = df[df["no_annotations"] == 2]["image_id"].values[0]
visualize_detection(img, image_dict[img])
# image with 5 detection
img = df[df["no_annotations"] == 5]["image_id"].values[0]
visualize_detection(img, image_dict[img])
# image with 10 detection
img = df[df["no_annotations"] == 10]["image_id"].values[0]
visualize_detection(img, image_dict[img])
# image with 18 detection
img = df[df["no_annotations"] == 18]["image_id"].values[0]
visualize_detection(img, image_dict[img])