# Example 7 - Export outputs for Echoview

In this example, we'll run the whole framework on 2018 data, and then exports the ROIs of Atlantic herring schools as Echoview format. 

In [1]:
import os
import numpy as np
import pandas as pd
import glob
import pickle
import random

from src.read_echogram import EchogramReader
from src.detect_ROI import ROIDetector
from src.ROI_features import FeatureExtractor
from src.transform_annotations import AnnotationTransformer
from src.match_annotations import OverlapAnnotation
from src.crop_ROI import ROICropper
from src.transform_ROI import ROITransformer

from sklearn.preprocessing import StandardScaler, MinMaxScaler

from multiprocessing import Pool

%matplotlib inline

## Step 1. Get ROI detector and classifier ready. 

In this step, we'll first get the ROI classifier ready, here, let's use the PU-learning classifier instead. 

In [2]:
# parameters for ROI detector
threshold = -66
kernel_size = 1

In [3]:
pkl_dir = "pkl/"

In [4]:
# load classifier (update, change pos/neg ratio, from 1 to 4, recall only 0.78)
with open(pkl_dir + 'model_PU.pkl', 'rb') as handle:
    pu_estimator = pickle.load(handle)



Actually, it is observed that PU learning (with lower recall) may tend to miss AH schools, but less likely to make errors. Therefore, try co-training also!

In [5]:
# features
acoustic_features = ['Sv_18kHz_min', 'Sv_18kHz_p5', 'Sv_18kHz_p25', 'Sv_18kHz_p50', 'Sv_18kHz_p75', 'Sv_18kHz_p95', 'Sv_18kHz_max', 'Sv_18kHz_std', 'Sv_38kHz_min', 'Sv_38kHz_p5', 'Sv_38kHz_p25', 'Sv_38kHz_p50', 'Sv_38kHz_p75', 'Sv_38kHz_p95', 'Sv_38kHz_max', 'Sv_38kHz_std', 'Sv_120kHz_min', 'Sv_120kHz_p5', 'Sv_120kHz_p25', 'Sv_120kHz_p50', 'Sv_120kHz_p75', 'Sv_120kHz_p95', 'Sv_120kHz_max', 'Sv_120kHz_std', 'Sv_200kHz_min', 'Sv_200kHz_p5', 'Sv_200kHz_p25', 'Sv_200kHz_p50', 'Sv_200kHz_p75', 'Sv_200kHz_p95', 'Sv_200kHz_max', 'Sv_200kHz_std', 'Sv_ref_18kHz', 'Sv_ref_120kHz', 'Sv_ref_200kHz']
geometric_features = ['length', 'thickness', 'area', 'perimeter', 'rectangularity', 'compact', 'circularity', 'elongation']
geographic_features_vertical = ['total_water_column', 'depth', 'relative_altitude']
geographic_features_horizontal = ['latitude', 'longitude']
sel_features = acoustic_features + geometric_features + geographic_features_vertical + geographic_features_horizontal

In [6]:
# minmax tranformer
df_roi_features = pd.read_pickle(pkl_dir + 'df_roi_features_new.pkl') # kernel_size = 1
min_max_scaler = MinMaxScaler()
df_roi_features[sel_features] = min_max_scaler.fit_transform(df_roi_features[sel_features])

## Step 2. Load 2018 dataset dir

In this step, we'll load dataset of 2018. 

In [7]:
raw_dir = "../data/HB1806_EK60/rawfiles/"
bot_dir = "../data/HB1806_EK60/botfiles/"
freq_li = [18, 38, 120, 200]

In [8]:
raw_paths = sorted(glob.glob(raw_dir + '*.raw'))
bot_paths = sorted(glob.glob(bot_dir + '*.bot'))

In [9]:
raw_paths

['../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T135645.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T144441.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T151359.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T153904.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T160402.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T162903.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T165355.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T171842.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T174337.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T180857.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T190545.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T193351.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T200007.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T202633.raw',
 '../data/HB1806_EK60/rawfiles/HB1806_EK60D20180904-T205226.raw',
 '../data/

## Step 3. Identify AH from 2018 data

In this step, we'll define the function to get all AH schools from 2018 data, saved them as dict. In total, there are 3113 echograms in 2018. Est. 4 hours. 

In [10]:
fig_dir = None

In [11]:
output_dir = "pkl/export_2018_PU_ratio_3/"

In [12]:
def identify_AH(a):
    i, j = a
    # load echogram
    echogram = EchogramReader(i, j, freq_li)
    filename, Sv_npy, surface_idx, bottom_idx, time, depth, positions = echogram()   
    # detect ROIs
    roi = ROIDetector(filename, Sv_npy, surface_idx, bottom_idx, fig_dir, threshold, kernel_size)
    img_shape, contours = roi()
    # get features
    features = FeatureExtractor(filename, contours, Sv_npy, bottom_idx, time, depth, positions)                    
    contours_sel, contours_features = features() 
    # predict if AH or not
    contours_sel_sel = []
    for idx, contour in enumerate(contours_features):
        sample = np.array([contour[feat] for feat in sel_features])
        label = pu_estimator.predict(min_max_scaler.transform(sample.reshape(1, -1)))
        # sample_norm = min_max_scaler.transform(sample.reshape(1, -1))
        # label = co_estimator.predict(sample_norm[:, :-5], sample_norm[:, -5:])
        if label == 1:
            contours_sel_sel.append(contours_sel[idx]) # select contours
    # transform each ROI, one echogram + multiple ROIs
    res = []
    for contour in contours_sel_sel:
        roi_transformer = ROITransformer(contour, depth, time)
        point_count, bbox_points, mask_points = roi_transformer() # list, or nested list
        res.append([point_count, bbox_points, mask_points])
    # save
    if res:
        with open(output_dir + f'{filename}.pkl', 'wb') as f:
            pickle.dump(res, f)

Use multiprocessing to save time. 

In [13]:
pool = Pool(os.cpu_count())
pool.map(identify_AH, zip(raw_paths, bot_paths)) # no return

HB1806_EK60D20180907-T090035HB1806_EK60D20180904-T135645

HB1806_EK60D20180910-T203235HB1806_EK60D20180921-T060948HB1806_EK60D20180917-T134619HB1806_EK60D20180926-T214452


HB1806_EK60D20180928-T082811HB1806_EK60D20180914-T055540


HB1806_EK60D20180910-T210125
HB1806_EK60D20180904-T144441
HB1806_EK60D20180910-T212936
HB1806_EK60D20180904-T151359
HB1806_EK60D20180910-T220550
HB1806_EK60D20180904-T153904
HB1806_EK60D20180904-T160402
HB1806_EK60D20180910-T230036
HB1806_EK60D20180907-T094653
HB1806_EK60D20180904-T162903
HB1806_EK60D20180904-T165355
HB1806_EK60D20180904-T171842
HB1806_EK60D20180917-T144312
HB1806_EK60D20180910-T235729
HB1806_EK60D20180904-T174337
HB1806_EK60D20180914-T065238
HB1806_EK60D20180928-T084920
HB1806_EK60D20180911-T003054
HB1806_EK60D20180904-T180857
HB1806_EK60D20180911-T005902
HB1806_EK60D20180921-T070640
HB1806_EK60D20180907-T104342
HB1806_EK60D20180911-T012741
HB1806_EK60D20180907-T113336
HB1806_EK60D20180911-T015802
HB1806_EK60D20180926-T220612
HB1806_EK60D20

HB1806_EK60D20180915-T123359
HB1806_EK60D20180926-T021431
HB1806_EK60D20180915-T130106
HB1806_EK60D20180905-T230037
HB1806_EK60D20180908-T143157
HB1806_EK60D20180928-T194525
HB1806_EK60D20180915-T132838
HB1806_EK60D20180912-T105543
HB1806_EK60D20180926-T023601
HB1806_EK60D20180912-T114546
HB1806_EK60D20180912-T121904
HB1806_EK60D20180905-T235727
HB1806_EK60D20180915-T135554
HB1806_EK60D20180908-T152848
HB1806_EK60D20180926-T025725
HB1806_EK60D20180918-T035940
HB1806_EK60D20180928-T200634
HB1806_EK60D20180912-T131600
HB1806_EK60D20180927-T022140HB1806_EK60D20180915-T142330

HB1806_EK60D20180906-T002947
HB1806_EK60D20180928-T202743
HB1806_EK60D20180912-T141255
HB1806_EK60D20180926-T031837
HB1806_EK60D20180928-T204851
HB1806_EK60D20180912-T150951
HB1806_EK60D20180906-T005647
HB1806_EK60D20180928-T210959
HB1806_EK60D20180918-T045632
HB1806_EK60D20180928-T213106
HB1806_EK60D20180906-T013032
HB1806_EK60D20180912-T160551
HB1806_EK60D20180926-T033949
HB1806_EK60D20180928-T215214
HB1806_EK60D20

HB1806_EK60D20180930-T031327
HB1806_EK60D20180916-T135312
HB1806_EK60D20181004-T030737
HB1806_EK60D20180916-T142153
HB1806_EK60D20180910-T032407
HB1806_EK60D20181002-T163445
HB1806_EK60D20180930-T033436
HB1806_EK60D20181004-T032846
HB1806_EK60D20180926-T181045
HB1806_EK60D20181001-T101350
HB1806_EK60D20180927-T101033
HB1806_EK60D20180916-T151845
HB1806_EK60D20181002-T165554
HB1806_EK60D20180930-T035544
HB1806_EK60D20181004-T034955
HB1806_EK60D20180927-T103145
HB1806_EK60D20180916-T160239
HB1806_EK60D20180926-T183155
HB1806_EK60D20180927-T105258
HB1806_EK60D20181004-T041102
HB1806_EK60D20181002-T171703
HB1806_EK60D20180916-T163052
HB1806_EK60D20180910-T042058
HB1806_EK60D20180926-T185306
HB1806_EK60D20181002-T173814
HB1806_EK60D20181004-T043209
HB1806_EK60D20181001-T103457
HB1806_EK60D20180930-T041653
HB1806_EK60D20180927-T111410
HB1806_EK60D20180916-T172612
HB1806_EK60D20180930-T043802
HB1806_EK60D20181002-T175923
HB1806_EK60D20181004-T045318
HB1806_EK60D20181001-T105605
HB1806_EK60D20

HB1806_EK60D20181012-T161352
HB1806_EK60D20181010-T143743
HB1806_EK60D20180927-T184119
HB1806_EK60D20181001-T192330
HB1806_EK60D20181012-T023036
HB1806_EK60D20181003-T074450
HB1806_EK60D20180930-T093358
HB1806_EK60D20181009-T204214
HB1806_EK60D20181010-T145332
HB1806_EK60D20181012-T024627
HB1806_EK60D20181012-T162942
HB1806_EK60D20181012-T030217
HB1806_EK60D20181001-T194439
HB1806_EK60D20180930-T095507
HB1806_EK60D20181003-T080559
HB1806_EK60D20181012-T164527
HB1806_EK60D20181009-T210321
HB1806_EK60D20181001-T200548
HB1806_EK60D20180927-T190236
HB1806_EK60D20181010-T150922
HB1806_EK60D20181012-T170118
HB1806_EK60D20181012-T031807
HB1806_EK60D20181001-T202750
HB1806_EK60D20180930-T101615
HB1806_EK60D20181003-T082708
HB1806_EK60D20181012-T171710
HB1806_EK60D20181009-T212429
HB1806_EK60D20181012-T033352
HB1806_EK60D20181003-T084816
HB1806_EK60D20181001-T204858
HB1806_EK60D20180930-T103723
HB1806_EK60D20181010-T152515
HB1806_EK60D20181009-T214537
HB1806_EK60D20180927-T192346
HB1806_EK60D20

HB1806_EK60D20181011-T033349
HB1806_EK60D20181012-T112855
HB1806_EK60D20180928-T024901
HB1806_EK60D20180930-T182229
HB1806_EK60D20181010-T013811
HB1806_EK60D20181012-T114446
HB1806_EK60D20181011-T034940HB1806_EK60D20181014-T223833

HB1806_EK60D20181002-T140647
HB1806_EK60D20180930-T184337
HB1806_EK60D20181011-T040527
HB1806_EK60D20180930-T190446
HB1806_EK60D20181011-T042116
HB1806_EK60D20180930-T192554
HB1806_EK60D20181013-T194025
HB1806_EK60D20180930-T194703
HB1806_EK60D20181011-T043707
HB1806_EK60D20181002-T142756
HB1806_EK60D20180930-T200812
HB1806_EK60D20181010-T015950
HB1806_EK60D20181012-T120035
HB1806_EK60D20180930-T202920
HB1806_EK60D20181011-T045300
HB1806_EK60D20180928-T031016
HB1806_EK60D20180930-T205028
HB1806_EK60D20181014-T225422
HB1806_EK60D20181002-T144905
HB1806_EK60D20181011-T050847
HB1806_EK60D20181002-T151012
HB1806_EK60D20180930-T211136
HB1806_EK60D20181010-T022058
HB1806_EK60D20181012-T121623
HB1806_EK60D20180928-T033133
HB1806_EK60D20181011-T052436
HB1806_EK60D20

  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50
  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181013-T195451
HB1806_EK60D20180930-T215353
HB1806_EK60D20181002-T153120
HB1806_EK60D20180930-T221502
HB1806_EK60D20181011-T054028
HB1806_EK60D20181014-T231013
HB1806_EK60D20180928-T035244
HB1806_EK60D20180930-T223611
HB1806_EK60D20180930-T225718
HB1806_EK60D20181002-T155227
HB1806_EK60D20181011-T055616
HB1806_EK60D20180930-T231826
HB1806_EK60D20181012-T123214
HB1806_EK60D20181014-T232601
HB1806_EK60D20181010-T024206
HB1806_EK60D20180930-T233934
HB1806_EK60D20181016-T095214
HB1806_EK60D20181011-T061205
HB1806_EK60D20180928-T041355
HB1806_EK60D20181013-T200916
HB1806_EK60D20181001-T000042
HB1806_EK60D20181014-T234152
HB1806_EK60D20181001-T002149
HB1806_EK60D20181010-T030314
HB1806_EK60D20181012-T124804
HB1806_EK60D20181011-T062756
HB1806_EK60D20181010-T032421
HB1806_EK60D20181017-T133302
HB1806_EK60D20180928-T043506
HB1806_EK60D20181012-T130354
HB1806_EK60D20181013-T202339
HB1806_EK60D20181014-T235741
HB1806_EK60D20181012-T131940
HB1806_EK60D20181010-T034529
HB1806_EK60D20

  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181010-T083304
HB1806_EK60D20181031-T085524
HB1806_EK60D20181030-T173418
HB1806_EK60D20181011-T104106
HB1806_EK60D20181030-T175010
HB1806_EK60D20181015-T030733
HB1806_EK60D20181030-T180604
HB1806_EK60D20181031-T091148
HB1806_EK60D20181030-T182156
HB1806_EK60D20181030-T183740
HB1806_EK60D20181030-T185332
HB1806_EK60D20181015-T032324
HB1806_EK60D20181030-T190923
HB1806_EK60D20181017-T143832
HB1806_EK60D20181010-T084856
HB1806_EK60D20181030-T192509
HB1806_EK60D20181030-T194101
HB1806_EK60D20181011-T105658
HB1806_EK60D20181030-T195652
HB1806_EK60D20181013-T231001
HB1806_EK60D20181015-T033915
HB1806_EK60D20181030-T201242
HB1806_EK60D20181101-T101932
HB1806_EK60D20181030-T202832
HB1806_EK60D20181015-T035506
HB1806_EK60D20181031-T092812
HB1806_EK60D20181030-T204418
HB1806_EK60D20181030-T210010
HB1806_EK60D20181011-T111248
HB1806_EK60D20181030-T211604
HB1806_EK60D20181015-T041056
HB1806_EK60D20181030-T213157
HB1806_EK60D20181010-T090447
HB1806_EK60D20181030-T214749
HB1806_EK60D20

  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181102-T140222
HB1806_EK60D20181102-T141810
HB1806_EK60D20181102-T143402
HB1806_EK60D20181010-T103950
HB1806_EK60D20181102-T144956
HB1806_EK60D20181102-T150546
HB1806_EK60D20181015-T061735
HB1806_EK60D20181014-T004511
HB1806_EK60D20181102-T152140


  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50
  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181102-T153733
HB1806_EK60D20181102-T155328
HB1806_EK60D20181017-T151113
HB1806_EK60D20181102-T160920
HB1806_EK60D20181102-T162513
HB1806_EK60D20181015-T065537
HB1806_EK60D20181031-T103347
HB1806_EK60D20181102-T164105
HB1806_EK60D20181010-T105543


  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181102-T165659
HB1806_EK60D20181102-T171252
HB1806_EK60D20181014-T010104
HB1806_EK60D20181030-T235453
HB1806_EK60D20181102-T172845
HB1806_EK60D20181102-T174439
HB1806_EK60D20181102-T180031
HB1806_EK60D20181103-T133302
HB1806_EK60D20181102-T181626
HB1806_EK60D20181103-T134900
HB1806_EK60D20181010-T111137
HB1806_EK60D20181102-T183220
HB1806_EK60D20181014-T011656
HB1806_EK60D20181103-T140450
HB1806_EK60D20181102-T184803
HB1806_EK60D20181103-T142042
HB1806_EK60D20181102-T190356
HB1806_EK60D20181103-T143635
HB1806_EK60D20181102-T191948
HB1806_EK60D20181101-T110841
HB1806_EK60D20181103-T145227
HB1806_EK60D20181031-T105011
HB1806_EK60D20181102-T193541
HB1806_EK60D20181103-T150830
HB1806_EK60D20181102-T195132
HB1806_EK60D20181014-T013248
HB1806_EK60D20181103-T152423
HB1806_EK60D20181102-T200723
HB1806_EK60D20181010-T112728
HB1806_EK60D20181103-T154020
HB1806_EK60D20181102-T202315
HB1806_EK60D20181103-T155616
HB1806_EK60D20181102-T203908
HB1806_EK60D20181103-T161207
HB1806_EK60D20

HB1806_EK60D20181014-T182531
HB1806_EK60D20181014-T184118
HB1806_EK60D20181017-T172159
HB1806_EK60D20181014-T185706
HB1806_EK60D20181104-T132902
HB1806_EK60D20181106-T213426
HB1806_EK60D20181106-T215021
HB1806_EK60D20181105-T180323
HB1806_EK60D20181106-T220616
HB1806_EK60D20181106-T222214
HB1806_EK60D20181101-T135239
HB1806_EK60D20181017-T173756
HB1806_EK60D20181106-T223813
HB1806_EK60D20181031-T014854
HB1806_EK60D20181106-T225411
HB1806_EK60D20181104-T171619
HB1806_EK60D20181106-T231008
HB1806_EK60D20181104-T134516
HB1806_EK60D20181031-T135043
HB1806_EK60D20181105-T181918
HB1806_EK60D20181106-T232606
HB1806_EK60D20181017-T175347
HB1806_EK60D20181106-T234203
HB1806_EK60D20181106-T235759
HB1806_EK60D20181017-T180939
HB1806_EK60D20181107-T001358
HB1806_EK60D20181107-T002955
HB1806_EK60D20181105-T183511
HB1806_EK60D20181107-T004552
HB1806_EK60D20181104-T140130
HB1806_EK60D20181107-T010149
HB1806_EK60D20181031-T140703
HB1806_EK60D20181107-T011748
HB1806_EK60D20181101-T140903
HB1806_EK60D20

  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50
  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181107-T051731
HB1806_EK60D20181017-T200048
HB1806_EK60D20181107-T053323
HB1806_EK60D20181104-T180501
HB1806_EK60D20181107-T054921
HB1806_EK60D20181107-T060521
HB1806_EK60D20181017-T201638
HB1806_EK60D20181107-T062114
HB1806_EK60D20181104-T145002
HB1806_EK60D20181031-T023838
HB1806_EK60D20181107-T063710
HB1806_EK60D20181031-T151240
HB1806_EK60D20181101-T144152
HB1806_EK60D20181107-T065310
HB1806_EK60D20181017-T203223
HB1806_EK60D20181107-T070908
HB1806_EK60D20181017-T204816
HB1806_EK60D20181107-T072507
HB1806_EK60D20181017-T210406
HB1806_EK60D20181105-T193850
HB1806_EK60D20181017-T211953
HB1806_EK60D20181031-T152904
HB1806_EK60D20181017-T213545
HB1806_EK60D20181104-T182114
HB1806_EK60D20181017-T215136
HB1806_EK60D20181017-T220727
HB1806_EK60D20181107-T074105
HB1806_EK60D20181101-T145816
HB1806_EK60D20181031-T025501
HB1806_EK60D20181017-T222319
HB1806_EK60D20181017-T223911
HB1806_EK60D20181031-T154528
HB1806_EK60D20181017-T225504
HB1806_EK60D20181105-T195445
HB1806_EK60D20

  Sv_features.append(Sv_features[i][3] / float(Sv_features[1][3])) # perc 50


HB1806_EK60D20181031-T220224
HB1806_EK60D20181110-T132416
HB1806_EK60D20181108-T064621
HB1806_EK60D20181110-T134117
HB1806_EK60D20181101-T180953
HB1806_EK60D20181110-T135745
HB1806_EK60D20181107-T102042
HB1806_EK60D20181109-T105412
HB1806_EK60D20181110-T141401
HB1806_EK60D20181107-T103641
HB1806_EK60D20181108-T070213
HB1806_EK60D20181110-T143019
HB1806_EK60D20181107-T105245
HB1806_EK60D20181110-T144614
HB1806_EK60D20181110-T150221
HB1806_EK60D20181107-T110848
HB1806_EK60D20181108-T071806
HB1806_EK60D20181031-T221849
HB1806_EK60D20181110-T152037
HB1806_EK60D20181107-T112443
HB1806_EK60D20181109-T111009
HB1806_EK60D20181110-T153631
HB1806_EK60D20181107-T114037
HB1806_EK60D20181105-T233646
HB1806_EK60D20181110-T155225
HB1806_EK60D20181110-T160818
HB1806_EK60D20181108-T073359
HB1806_EK60D20181110-T162411
HB1806_EK60D20181104-T222424
HB1806_EK60D20181110-T164005
HB1806_EK60D20181101-T182548
HB1806_EK60D20181110-T165557
HB1806_EK60D20181110-T171150
HB1806_EK60D20181110-T172744
HB1806_EK60D20

HB1806_EK60D20181109-T055212
HB1806_EK60D20181101-T212047
HB1806_EK60D20181107-T200946
HB1806_EK60D20181109-T060805
HB1806_EK60D20181101-T024037
HB1806_EK60D20181109-T062357
HB1806_EK60D20181105-T001540
HB1806_EK60D20181111-T190732
HB1806_EK60D20181107-T202536
HB1806_EK60D20181109-T063949
HB1806_EK60D20181109-T065542
HB1806_EK60D20181109-T071130
HB1806_EK60D20181109-T174818
HB1806_EK60D20181111-T192326
HB1806_EK60D20181109-T072719
HB1806_EK60D20181107-T204132
HB1806_EK60D20181109-T074311
HB1806_EK60D20181109-T075905
HB1806_EK60D20181111-T193920
HB1806_EK60D20181101-T025658
HB1806_EK60D20181109-T081458
HB1806_EK60D20181106-T031910
HB1806_EK60D20181107-T205728
HB1806_EK60D20181101-T213643
HB1806_EK60D20181111-T195514
HB1806_EK60D20181107-T211324
HB1806_EK60D20181109-T083052
HB1806_EK60D20181109-T180405
HB1806_EK60D20181107-T212912
HB1806_EK60D20181105-T003136
HB1806_EK60D20181101-T031329
HB1806_EK60D20181107-T214507
HB1806_EK60D20181111-T201109
HB1806_EK60D20181107-T220102
HB1806_EK60D20

HB1806_EK60D20181102-T034228
HB1806_EK60D20181106-T135235
HB1806_EK60D20181112-T151817
HB1806_EK60D20181105-T084604
HB1806_EK60D20181110-T005742
HB1806_EK60D20181105-T090200
HB1806_EK60D20181102-T035854
HB1806_EK60D20181105-T091755
HB1806_EK60D20181106-T140830
HB1806_EK60D20181105-T093349
HB1806_EK60D20181110-T011335
HB1806_EK60D20181105-T094946
HB1806_EK60D20181105-T100541
HB1806_EK60D20181105-T102137
HB1806_EK60D20181113-T000327
HB1806_EK60D20181106-T142426
HB1806_EK60D20181102-T041446
HB1806_EK60D20181105-T103735
HB1806_EK60D20181105-T105334
HB1806_EK60D20181105-T110931
HB1806_EK60D20181110-T012925
HB1806_EK60D20181102-T043040
HB1806_EK60D20181106-T144022
HB1806_EK60D20181105-T112522
HB1806_EK60D20181110-T014526
HB1806_EK60D20181105-T114119
HB1806_EK60D20181102-T044632
HB1806_EK60D20181106-T145619
HB1806_EK60D20181113-T001917
HB1806_EK60D20181110-T020121
HB1806_EK60D20181105-T115716
HB1806_EK60D20181102-T050225
HB1806_EK60D20181106-T151214
HB1806_EK60D20181102-T051820
HB1806_EK60D20

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

In [14]:
pool.close()
pool.join()

In annotations, AH school from 10/01 - 11/13, in total, 341 unique echograms. 

## Step 4. Check with 2018 annotations

In this step, we'll check how many generated files overlap with annotations, what %, and how we can further improve it?

In [20]:
output_dir = "pkl/export_2018_PU_ratio_4/"

In [21]:
annotations_dir = "../pkl/"
annotations = pd.read_pickle(annotations_dir + "annotation_fish_school_features_2018_ah_only.pickle")
print(annotations.columns)
print(annotations['label'].unique())

Index(['echogram_id', 'width', 'height', 'Sv_18kHz', 'Sv_38kHz', 'Sv_120kHz',
       'Sv_200kHz', 'time', 'depth', 'total_water_column', 'latitude',
       'longitude', 'speed', 'x_min', 'x_max', 'y_min', 'y_max', 'center_x',
       'center_y', 'label', 'sel_label'],
      dtype='object')
['AH_School']


In [22]:
# get unique files (doubled...)
annotations_li = annotations['echogram_id'].unique()
print(len(annotations_li))

341


In [23]:
# check overlap
predictions_li = [i.replace('.pkl', '') for i in os.listdir(output_dir)]
print(len(predictions_li))

571


In [24]:
count = 0
for item in annotations_li:
    if item in predictions_li:
        count += 1
print(count, len(annotations_li)) # only 78%

254 341


Note that some AH schools in annotations are not actually AH schools, they may overlap with other regions, and not filtered out. Therefore, in reality, this number should be lower than 538. 

In [25]:
# compute number of AH schools per echogram
annotations_group = annotations.groupby('echogram_id')['label'].count().reset_index()
annotations_group1 = annotations_group[annotations_group['echogram_id'].isin(predictions_li)]
annotations_group2 = annotations_group[~annotations_group['echogram_id'].isin(predictions_li)]
print(annotations_group1['label'].max(), annotations_group2['label'].max())

16 11


It looks like those that the model failed to detect may relate to less annotations, e.g., when there are only a few AH schools on the echogram.  