# Merging annotations from all datasets


## Purpose of this notebook

This notebook allows to create a master annotation file gathering annotations from all datasets. This master annotation file is what is used to the spectrogram database for Ketos.

In [1]:
# load  libraries
import os
from ecosound.core.annotation import Annotation
from ecosound.core.tools import list_files
import pandas as pd
from datetime import datetime

## Find all annotation files available

In [2]:
root_dir =r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets'

# Search for all annotation files available in the "root_dir" folder:
dataset_files = list_files(root_dir, suffix='annotations.nc', recursive=True)
dataset_files

['D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201607_181-Helmsdale\\noise\\Annotations_dataset_UK-MSS-MorayFirth-201607_181-Helmsdale annotations.nc',
 'D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201806_274-Helmsdale\\Annotations_dataset_UK-MSS-MorayFirth-201806_274-Helmsdale annotations.nc',
 'D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201806_274-Helmsdale\\noise\\Annotations_dataset_UK-MSS-MorayFirth-201806_274-Helmsdale annotations.nc',
 'D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201904_360-Helmsdale\\Annotations_dataset_UK-MSS-MorayFirth-201904_360-Helmsdale annotations.nc',
 'D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201904_360-Helmsdale\\noise\\Annotations_dataset_UK-MSS-MorayFirth-201904_360-Helmsdale annotations.nc',
 'D:\\NOA

In [3]:
len(dataset_files)
dataset_files[0]

'D:\\NOAA\\2022_Minke_whale_detector\\manual_annotations\\discrete_datasets\\UK-MSS-MorayFirth-201607_181-Helmsdale\\noise\\Annotations_dataset_UK-MSS-MorayFirth-201607_181-Helmsdale annotations.nc'

## Concatenate all annotations into a single dataset:

In [4]:
# # load all annotations
annot = Annotation()
for file in dataset_files:
    print(file)
    tmp = Annotation()
    tmp.from_netcdf(file, verbose=True)
    annot = annot + tmp
annot.check_integrity()
annot.data['label_subclass'] = annot.data['label_subclass'].replace('nan', '', regex=True)

D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-MSS-MorayFirth-201607_181-Helmsdale\noise\Annotations_dataset_UK-MSS-MorayFirth-201607_181-Helmsdale annotations.nc


Cannot find the ecCodes library


Duplicate entries removed: 0
Integrity test succesfull
4008 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-MSS-MorayFirth-201806_274-Helmsdale\Annotations_dataset_UK-MSS-MorayFirth-201806_274-Helmsdale annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
18 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-MSS-MorayFirth-201806_274-Helmsdale\noise\Annotations_dataset_UK-MSS-MorayFirth-201806_274-Helmsdale annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
121 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-MSS-MorayFirth-201904_360-Helmsdale\Annotations_dataset_UK-MSS-MorayFirth-201904_360-Helmsdale annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
54 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-MSS-MorayFirth-201904_360-Helmsdale\noise\Ann

Duplicate entries removed: 0
Integrity test succesfull
9 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-UAberdeen-MorayFirth-202010_1137-112\noise\Annotations_dataset_UK-UAberdeen-MorayFirth-202010_1137-112 annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
81 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-UAberdeen-MorayFirth-202101_1136-164\Annotations_dataset_UK-UAberdeen-MorayFirth-202101_1136-164 annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
108 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-UAberdeen-MorayFirth-202101_1136-164\noise\Annotations_dataset_UK-UAberdeen-MorayFirth-202101_1136-164 annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
37 annotations imported.
D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\UK-UAberdeen-MorayFirth-202101_1137-112

D:\NOAA\2022_Minke_whale_detector\manual_annotations\discrete_datasets\USA-NEFSC_CARIBBEAN_201612_MTQ\Annotations_dataset_FRA-NEFSC-CARIBBEAN-201612-MTQ annotations.nc
Duplicate entries removed: 0
Integrity test succesfull
788 annotations imported.


In [5]:
print(annot.summary())

label_class                                HB     HK    MW     NN  Total
deployment_ID                                                           
FRA-NEFSC-CARIBBEAN-201612-MTQ            788      0     0      0    788
UK-MSS-MorayFirth-201607_181-Helmsdale      0      0     0   4008   4008
UK-MSS-MorayFirth-201806_274-Helmsdale      0      0    18    121    139
UK-MSS-MorayFirth-201904_360-Helmsdale      0      0    54    482    536
UK-MSS-MorayFirth-201908_421-Helmsdale      0      0   453   2346   2799
UK-SAMS-WestScotland-201711-StantonBank     0      0   762   1911   2673
UK-SAMS-WestScotland-202009-N1              0      0   563   8922   9485
UK-SAMS-WestScotland-202009-N2              0      0   104   4461   4565
UK-SAMS-WestScotland-202010-EL1             0      0   238   3885   4123
UK-SAMS-WestScotland-202010-S1              0      0     4   5327   5331
UK-UAberdeen-MorayFirth-201709_887-162      0      0     0     18     18
UK-UAberdeen-MorayFirth-201904_1027-235     0      

## Update audio file paths

In [6]:
annot.update_audio_dir(root_dir)

## Summary of the dataset

In [7]:
print(annot.summary())

label_class                                HB     HK    MW     NN  Total
deployment_ID                                                           
FRA-NEFSC-CARIBBEAN-201612-MTQ            788      0     0      0    788
UK-MSS-MorayFirth-201607_181-Helmsdale      0      0     0   4008   4008
UK-MSS-MorayFirth-201806_274-Helmsdale      0      0    18    121    139
UK-MSS-MorayFirth-201904_360-Helmsdale      0      0    54    482    536
UK-MSS-MorayFirth-201908_421-Helmsdale      0      0   453   2346   2799
UK-SAMS-WestScotland-201711-StantonBank     0      0   762   1911   2673
UK-SAMS-WestScotland-202009-N1              0      0   563   8922   9485
UK-SAMS-WestScotland-202009-N2              0      0   104   4461   4565
UK-SAMS-WestScotland-202010-EL1             0      0   238   3885   4123
UK-SAMS-WestScotland-202010-S1              0      0     4   5327   5331
UK-UAberdeen-MorayFirth-201709_887-162      0      0     0     18     18
UK-UAberdeen-MorayFirth-201904_1027-235     0      

## Save master annotation dataset

In [8]:
# Now save 
annot.to_netcdf(os.path.join(root_dir, 'Annotations_dataset_' + "-".join(annot.get_labels_class()) + '_' + datetime.today().strftime('%Y%m%dT%H%M%S') + '.nc'))