# Example 0 - Transform annotations

In this notebook, for each echogram with annotations, we'll save it as filename: [annotations, labels]. 

In [1]:
import os
import numpy as np
import pandas as pd
import glob
import pickle

from src.read_echogram import EchogramReader
from src.detect_ROI import ROIDetector
from src.ROI_features import FeatureExtractor
from src.transform_annotations import AnnotationTransformer
from src.match_annotations import OverlapAnnotation
from src.crop_ROI import ROICropper

%matplotlib inline

## Step 1. Load annotations

In this step, we'll load all echograms in 2019 (in total, 1710 echograms), see how it works. 

In [26]:
# load annotations (mask)
annotations_dir = "../csv/"
annotations = pd.read_csv(annotations_dir + "annotation_df_masks.csv")
# drop nan
annotations = annotations.dropna(how='any')
# add label map
label_map = {'Unclassified regions': 1, 'krill_schools': 2, 'fish_school': 3, 'AH_School': 4}

In [27]:
print(annotations['file_dir'])

0        ../data/HB1906_EK60/rawfiles/D20190925-T220811...
1        ../data/HB1906_EK60/rawfiles/D20190925-T204502...
2        ../data/HB1906_EK60/rawfiles/D20190925-T135327...
3        ../data/HB1906_EK60/rawfiles/D20190925-T171712...
4        ../data/HB1906_EK60/rawfiles/D20190925-T175846...
                               ...                        
92770    ../data/HB1906_EK60/rawfiles/D20191114-T074340...
92771    ../data/HB1906_EK60/rawfiles/D20191114-T094630...
92772    ../data/HB1906_EK60/rawfiles/D20191114-T074340...
92773    ../data/HB1906_EK60/rawfiles/D20191114-T123017...
92774    ../data/HB1906_EK60/rawfiles/D20191114-T102727...
Name: file_dir, Length: 91877, dtype: object


In [28]:
filename_li = annotations['file_dir'].unique()
print(len(filename_li))
filename_li.sort()

861


## Step 2. Transform annotations and save

In this step, we'll load echograms with annotations, and transform them into xy indices, save as dict. 
Est. 24 hours -> change to multiprocessing, Est. 12 hours

In [29]:
# add raw and bottom file dir
raw_dir = "../data/HB1906_EK60/rawfiles/"
bot_dir = "../data/HB1906_EK60/botfiles/"
freq_li = [18, 38, 120, 200]

In [30]:
raw_paths = sorted(glob.glob(raw_dir + '*.raw'))
bot_paths = sorted(glob.glob(bot_dir + '*.bot'))

In [32]:
def transform_annotations(a):
    i, j = a[0], a[1]
    if i not in filename_li:
        return
    echogram = EchogramReader(i, j, freq_li)
    filename, Sv_npy, surface_idx, bottom_idx, time, depth, positions = echogram()   
    transform = AnnotationTransformer(annotations, filename, depth, time, label_map) # very time consuming!
    annotations_idx, labels = transform() 
    # add
    return {filename: [annotations_idx, labels]}

In [None]:
from multiprocessing import Pool
pool = Pool(os.cpu_count())
res_li = pool.map(transform_annotations, zip(raw_paths, bot_paths)) # a list of results

In [37]:
annotations_dict = {} # merge
for d in res_li:
    if d == None:
        continue
    annotations_dict.update(d)

In [38]:
# merge into one dict
pkl_dir = "pkl/"
with open(pkl_dir + 'annotations_dict_new.pickle', 'wb') as handle:
    pickle.dump(annotations_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [39]:
pool.close()
pool.join()

In [1]:
# NOTE: use annotations_dict_new_p4.pickle, with protocol=4