# Example 1 - ROI detection

In this notebook, we'll perform ROI detection on echograms, and evaluate its performance using annotations. 

In [1]:
import os
import numpy as np
import pandas as pd
import glob
import pickle

from src.read_echogram import EchogramReader
from src.detect_ROI import ROIDetector
from src.ROI_features import FeatureExtractor
from src.transform_annotations import AnnotationTransformer
from src.match_annotations import OverlapAnnotation
from src.crop_ROI import ROICropper

%matplotlib inline

## Step 1. Load annotations

In this step, we'll load original and transformed annotations, i.e., filename: [annotations, labels], saved as pkl file. 

In [2]:
# load original annotations (mask)
annotations_dir = "../csv/"
annotations = pd.read_csv(annotations_dir + "annotation_df_masks.csv")
# drop nan
annotations = annotations.dropna(how='any')
# add label map
label_map = {'Unclassified regions': 1, 'krill_schools': 2, 'fish_school': 3, 'AH_School': 4}

In [3]:
# select echograms with AH schools
annotations_sel = annotations[annotations['label'] == 'AH_School']
filename_li = annotations_sel['file_dir'].unique()
filename_li.sort()

In [4]:
len(filename_li)

254

In [3]:
# load transformed annotations (query)
pkl_dir = "pkl/"
with open(pkl_dir + 'annotations_dict_new.pickle', 'rb') as handle:
    annotations_dict = pickle.load(handle)

## Step 2. Parameter study (overlap ratio)

In this step, we will test out how threshold and kernel_size impact recall, precision, and IoU in ROI detection. There are 1710 echograms in 2019. 

In [6]:
# add raw and bottom file dir
raw_dir = "../data/HB1906_EK60/rawfiles/"
bot_dir = "../data/HB1906_EK60/botfiles/"
freq_li = [18, 38, 120, 200]

In [7]:
raw_paths = sorted(glob.glob(raw_dir + '*.raw'))
bot_paths = sorted(glob.glob(bot_dir + '*.bot'))

In [8]:
fig_dir = "figures/"

First, let's test out overlap ratio & recall/precision/F1. All with -66dB threshold.

In [6]:
threshold = -66
kernel_size = 3
overlap_ratio_li = [0.0, 0.2, 0.4]

In [10]:
def test_overlap(a):
    i, j = a
    if i not in filename_li:
        return
    echogram = EchogramReader(i, j, freq_li)
    filename, Sv_npy, surface_idx, bottom_idx, time, depth, positions = echogram()   
    annotations_idx, labels = annotations_dict[filename] # get annotation xy indices directly
    # detect ROIs
    roi = ROIDetector(filename, Sv_npy, surface_idx, bottom_idx, fig_dir, threshold, kernel_size)
    img_shape, contours = roi()
    features = FeatureExtractor(filename, contours, Sv_npy, bottom_idx, time, depth, positions)                    
    contours_sel, contours_features = features() 
    temp_res = []
    for overlap_ratio in overlap_ratio_li:
        try:
            # match ROIs with annotations, bug: with only 1 annotations - D20191111-T080459
            overlap = OverlapAnnotation(filename, img_shape, annotations_idx, labels, contours_sel, fig_dir) 
            # object-level metrics
            counts = overlap.object_overlap_count(overlap_ratio) # set threshold   
            res_dict = {'filename': filename, 'threshold': threshold, 'kernel_size': kernel_size, 'overlap_ratio': overlap_ratio, 'annotations_valid': counts[0], 'annotations_all': counts[1], 'roi_valid': counts[2], 'roi_all': counts[3]}
            temp_res.append(res_dict)
        except:
            continue
    return temp_res     

Using multiprocessing, about 1 hour. Exactly 254 files. 

In [None]:
from multiprocessing import Pool
pool = Pool(os.cpu_count())
res_li = pool.map(test_overlap, zip(raw_paths, bot_paths)) # a list of results

In [12]:
res = []
for sublist in res_li:
    if sublist == None:
        continue
    for item in sublist:
        res.append(item)
df_overlap_ratio = pd.DataFrame(res)

In [13]:
pkl_dir = "pkl/"
df_overlap_ratio.to_pickle(pkl_dir + 'df_overlap_ratio_or.pkl')

In [14]:
pool.close()
pool.join()

Get recall/precision/F1 at different overlap ratio. 

In [7]:
pkl_dir = "pkl/"
df_overlap_ratio = pd.read_pickle(pkl_dir + 'df_overlap_ratio_or.pkl')

In [8]:
for overlap_ratio in overlap_ratio_li:
    df = df_overlap_ratio[df_overlap_ratio['overlap_ratio']==overlap_ratio]
    recall = df['annotations_valid'].sum() / df['annotations_all'].sum()
    precision = df['roi_valid'].sum() / df['roi_all'].sum()
    F1 = 1.0/(1.0/recall + 1.0/precision)
    print(f'overlap ratio: {overlap_ratio}, recall: {recall}, precision: {precision}, F1: {F1}')

overlap ratio: 0.0, recall: 0.9529691211401425, precision: 0.19820891345413838, F1: 0.16408146123384276
overlap ratio: 0.2, recall: 0.9439429928741092, precision: 0.1924018750437277, F1: 0.15982507325980058
overlap ratio: 0.4, recall: 0.9135391923990499, precision: 0.17770936822220668, F1: 0.14876947249766456


## Step 4. Check depth issue

The original echogram's depth starts at 7.5m, while annotations start at 6m, add 1.5m as offset. 

In [35]:
print(depth) # depth varies, 6m or 7.5m, annotations, buffer 6m, add offset

[  6.           6.19132538   6.38265076 ... 505.35924545 505.55057083
 505.74189621]
