# Example 5 - Analyze annotations

In this notebook, we'll examine typical features of different annotations. 

In [52]:
import os
import numpy as np
import pandas as pd
import glob
import pickle
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(font_scale=1.2)
plt.style.use('seaborn-white')
%matplotlib inline

## Step 1. Load annotations

In this step, we'll load all annotations. 

In [2]:
# load original annotations (mask)
annotations_dir = "../csv/"
annotations = pd.read_csv(annotations_dir + "annotation_df_masks.csv")
# drop nan
annotations = annotations.dropna(how='any')

In [3]:
print(annotations.columns)

Index(['Unnamed: 0', 'label', 'start_Date', 'start_Time', 'end_Date',
       'end_Time', 'start_Depth', 'end_Depth', 'mask', 'file_dir'],
      dtype='object')


In [4]:
# get median depth
annotations['depth'] = (annotations['start_Depth'] + annotations['end_Depth']) / 2.0
annotations_groupby_depth = annotations.groupby('label')['depth'].median()
print(annotations_groupby_depth)

label
AH_School               117.638394
Unclassified regions     13.526012
atlantic_herring         54.579208
fish_school              29.014150
krill_schools            70.000000
possible_herring         17.821782
Name: depth, dtype: float64


In [11]:
# load size file
annotations_size = pd.read_csv(annotations_dir + "annotation_size_df.csv")
print(annotations_size.groupby('label')['size'].median())

label
AH_School                  504.0
Unclassified regions      5440.0
atlantic_herring        114048.0
fish_school                315.0
krill_schools              249.0
possible_herring          1292.0
Name: size, dtype: float64


The above annotation data don't contain detailed size information. Try loading the .pkl file. 

In [5]:
pkl_dir = "../pkl/"
annotations_pkl = pd.read_pickle(pkl_dir + "annotation_fish_school_features_2019.pickle")

In [8]:
print(annotations_pkl['label'].unique())
print(annotations_pkl.columns)

['fish_school' 'Unclassified regions' 'AH_School']
Index(['echogram_id', 'width', 'height', 'Sv_18kHz', 'Sv_38kHz', 'Sv_120kHz',
       'Sv_200kHz', 'time', 'depth', 'total_water_column', 'latitude',
       'longitude', 'speed', 'x_min', 'x_max', 'y_min', 'y_max', 'center_x',
       'center_y', 'label'],
      dtype='object')


In [9]:
# get size
annotations_pkl['size'] = annotations_pkl['width'] * annotations_pkl['height']
print(annotations_pkl.groupby('label')['size'].median())

label
AH_School               317.711112
Unclassified regions    664.974566
fish_school             171.910885
Name: size, dtype: float64


Estimate Krill school median size: 136 m^2. 

Actually, vessel speed varied a lot. 

In [12]:
print(annotations_pkl['speed'].min(), annotations_pkl['speed'].max())

0.021291624621594354 13.3


## Step 2. Analyze Atlantic herring schools

In this step, we'll analyze the characteristics of Atlantic herring schools. 

In [29]:
annotation_df_AH = annotations_pkl[annotations_pkl['label']=='AH_School']

In [37]:
# cut into bins
annotation_df_AH['aspect_ratio'] = annotation_df_AH['width'] / annotation_df_AH['height']
annotation_df_AH = annotation_df_AH[annotation_df_AH['depth'] <= 250] # remove very bottom, with seabed echoes!!!
annotation_df_AH = annotation_df_AH.sort_values('depth')
annotation_df_AH['depth_range'] = pd.cut(annotation_df_AH['depth'], [0, 50, 100, 150, 200, 250])
colors = sns.color_palette(palette="Blues", n_colors=5)

In [31]:
fig_dir = "figures/"

Get lineplots (Sv values & depth). 

In [62]:
for idx, i in enumerate(annotation_df_AH['depth_range'].unique()):
    annotation_df_AH_sel = annotation_df_AH[annotation_df_AH['depth_range'] == i]
    Sv_values = annotation_df_AH_sel[['Sv_18kHz', 'Sv_38kHz', 'Sv_120kHz', 'Sv_200kHz']].median()
    sns.lineplot(x=range(4), y=Sv_values, marker="o", linewidth=3, label=i, color=colors[idx])
plt.xticks(range(4), ['18', '38', '120', '200'], fontsize=17)
plt.xlabel('Frequency (kHz)', fontsize=22)
plt.ylabel('Median Sv value', fontsize=22)
plt.yticks(fontsize=17)
plt.ylim(-68, -53)
plt.legend(prop={'size':16})
ax = plt.gca()
ax.grid(True, linestyle='--', linewidth=0.5)
plt.tight_layout(pad=0.1)
plt.savefig(fig_dir + "annotation_AH_Sv_by_depth_2019.png", dpi=300)
plt.close()

Get boxplots (Aspect ratio & depth). 

In [63]:
sns.boxplot(x="depth_range", y="aspect_ratio", data=annotation_df_AH, palette='Blues', showfliers=False, medianprops=dict(linewidth=2, alpha=1, linestyle='--', color='black'))
plt.xticks(fontsize=16)
plt.xlabel('Depth range (m)', fontsize=22)
plt.ylabel('Aspect ratio', fontsize=22)
plt.yticks(fontsize=17)
ax = plt.gca()
ax.grid(True, linestyle='--', linewidth=0.5)
plt.tight_layout(pad=0.1)
plt.savefig(fig_dir + "annotation_AH_shape_by_depth_2019.png", dpi=300)
plt.close()