# Creation of annotations from unannotated noise recordings


## Purpose of this notebook
This notebook describes the steps involved in automatically creating noise annotations from non-annotated noise recordings. This notebook is used for creating noise annotations from data provided by the Universty of Aberdeen in Scotland.

Annotations are made by breaking down the recording into adjacent annotations of a given duration until the end of the file. Min and max frequency of the annotations are 0 Hz and the Nyquist frequency, respectively.

## Deployment folders

The data provided were separated into folders corresponding to different deployments. As a result, 7 folders were created:

- UK-UAberdeen-MorayFirth-201904_986-110
- UK-UAberdeen-MorayFirth-201904_1027-235
- UK-UAberdeen-MorayFirth-201904_1029-237
- UK-UAberdeen-MorayFirth-202001_1092-112
- UK-UAberdeen-MorayFirth-202001_1093-164
- UK-UAberdeen-MorayFirth-202101_1136-164
- UK-UAberdeen-MorayFirth-202101_1137-112

A deployment_info.csv file was created in each of these folders and contains the metadata for each deployment.

![noise_scotland_folders.png](img/noise_scotland_folders.png)


## Import libraries and define functions used throughout

In [134]:
from ecosound.core.annotation import Annotation
from ecosound.core.metadata import DeploymentInfo
from ecosound.core.audiotools import Sound
from ecosound.core.tools import filename_to_datetime
import ecosound.core.decorators
import os
import re
import pandas as pd
import numpy as np
import uuid
from datetime import datetime

@ecosound.core.decorators.listinput
def filename_to_datetime2(files):
    pattern ={"string_pattern":"_[0-9]{6}_[0-9]{6}.","time_format":"_%y%m%d_%H%M%S."}            
    p = re.compile(pattern["string_pattern"])
    
    timestamps = [None] * len(files)
    for idx, file in enumerate(files):
        datestr = p.search(file)
        timestamps[idx] = datetime.strptime(datestr[0], pattern["time_format"])               
    return timestamps

@ecosound.core.decorators.listinput
def filename_to_datetime3(files):
    pattern ={"string_pattern":"_[0-9]{8}_[0-9]{6}","time_format":"_%Y%m%d_%H%M%S"}            
    p = re.compile(pattern["string_pattern"])
    
    timestamps = [None] * len(files)
    for idx, file in enumerate(files):
        datestr = p.search(file)
        timestamps[idx] = datetime.strptime(datestr[0], pattern["time_format"])               
    return timestamps

@ecosound.core.decorators.listinput
def filename_to_datetime4(files):
    #_2021-05-08_20-00-04
    pattern ={"string_pattern":"_[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{2}-[0-9]{2}-[0-9]{2}","time_format":"_%Y-%m-%d_%H-%M-%S"}            
    p = re.compile(pattern["string_pattern"])
    timestamps = [None] * len(files)
    for idx, file in enumerate(files):
        datestr = p.search(file)
        timestamps[idx] = datetime.strptime(datestr[0], pattern["time_format"])               
    return timestamps

def create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass):
    files_list = os.listdir(audio_dir)
    annot_stack = []
    for file in files_list:
        if file.endswith(file_ext):
            print(file)
            # retrieve file start date and time
            try:
                file_timestamp = filename_to_datetime(file)
            except:
                try:
                    file_timestamp = filename_to_datetime2(file)
                except:
                    try:
                        file_timestamp = filename_to_datetime3(file)
                    except:
                        file_timestamp = filename_to_datetime4(file)
            

            # retrieve file duration
            audio = Sound(os.path.join(audio_dir, file))
            file_dur = audio.file_duration_sec

            # define annotations start times (relative to start begining of the audio file)
            t1 = np.arange(0, file_dur, annot_dur_sec)
            t2 = t1[1:]
            t2 = np.append(t2, file_dur)            
            # makes sure the last annotation is longer than value defined by the user (annot_dur_sec)
            if len(t1)>1:
                if t2[-1]-t1[-1] < annot_dur_sec:
                    t1 = np.delete(t1, -1)
                    t2 = np.delete(t2, -2)
            else:
                if t2 > annot_dur_sec:
                    t2 = annot_dur_sec
            #print(t1)
            #print(t2)

            # create the annotatiom object
            annot = Annotation()

            annot.data['time_min_offset'] = t1
            annot.data['time_max_offset'] = t2
            annot.insert_values(audio_file_start_date=file_timestamp[0])
            annot.data['time_min_date'] = pd.to_datetime(
                annot.data['audio_file_start_date'] + pd.to_timedelta(
                    annot.data['time_min_offset'], unit='s'))
            annot.data['time_max_date'] = pd.to_datetime(
                annot.data['audio_file_start_date'] +
                pd.to_timedelta(annot.data['time_max_offset'], unit='s'))
            annot.insert_values(audio_channel=1)
            annot.insert_values(audio_file_name=os.path.splitext(os.path.basename(file))[0])
            annot.insert_values(audio_file_dir=audio_dir)
            annot.insert_values(audio_file_extension=os.path.splitext(file)[1])
            annot.insert_values(frequency_min=0)
            annot.insert_values(software_version=0)
            annot.insert_values(operator_name='xavier')
            annot.insert_values(entry_date=datetime.now())
            annot.insert_values(frequency_max=audio.file_sampling_frequency/2)
            annot.insert_values(label_class=label_class)
            annot.insert_values(label_subclass=label_subclass)
            annot.insert_values(from_detector=False)
            annot.insert_values(software_name='custom_python')
            annot.data['uuid'] = annot.data.apply(lambda _: str(uuid.uuid4()), axis=1)
            annot.data['duration'] = annot.data['time_max_offset'] - annot.data['time_min_offset']        
            # add metadata            
            annot.insert_metadata(os.path.join(audio_dir, deployment_file)) 
            
            # stack annotatiosn for each file
            annot_stack.append(annot)
            # check that evrything looks fine
            annot.check_integrity(verbose=False, ignore_frequency_duplicates=True)

    # concatenate all annotations
    annot_concat = annot_stack[0]
    for an_idx in range(1, len(annot_stack)):
        annot_concat = annot_concat + annot_stack[an_idx]
    annot_concat.check_integrity(verbose=False, ignore_frequency_duplicates=True)
    return annot_concat

### Dataset 1: UK-UAberdeen-MorayFirth-201904_986-110

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [31]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-201904_986-110'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [32]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl986_1678036995.190402110017.wav
Depl986_1678036995.190406225930.wav
Depl986_1678036995.190410165901.wav


Let's look at the summary of annotations that were created:

In [33]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201904_986-110,90,90
Total,90,90


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [None]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

Here is what the annotations look like in Raven:

![noiseScotland.png](img/noiseScotland.png)


### Dataset 2: UK-UAberdeen-MorayFirth-201904_1027-235

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [34]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-201904_1027-235'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [35]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1027_1677725722.190403115956.wav
Depl1027_1677725722.190411055855.wav
Depl1027_1677725722.190415235822.wav


Let's look at the summary of annotations that were created:

In [36]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201904_1027-235,90,90
Total,90,90


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [6]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 3: UK-UAberdeen-MorayFirth-201904_1029-237

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [7]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-201904_1029-237'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [8]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1029_134541352.190403235927.wav
Depl1029_134541352.190404175922.wav
Depl1029_134541352.190409115847.wav


Let's look at the summary of annotations that were created:

In [9]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201904_1029-237,90,90
Total,90,90


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [10]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 4: UK-UAberdeen-MorayFirth-202001_1092-112 (seismic)

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [11]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-202001_1092-112'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = 'S' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [14]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1092_1678036995.200101014914.wav
Depl1092_1678036995.200104224914.wav
Depl1092_1678036995.200104234914.wav
Depl1092_1678036995.200111084914.wav
Depl1092_1678036995.200119004914.wav
Depl1092_1678036995.200119034914.wav
Depl1092_1678036995.200121014914.wav
Depl1092_1678036995.200121214914.wav
Depl1092_1678036995.200124014914.wav
Depl1092_1678036995.200124164914.wav
Depl1092_1678036995.200125184914.wav
Depl1092_1678036995.200125214914.wav
Depl1092_1678036995.200128064914.wav
Depl1092_1678036995.200128134914.wav
Depl1092_1678036995.200128144914.wav
Depl1092_1678036995.200201214914.wav
Depl1092_1678036995.200204224914.wav
Depl1092_1678036995.200206004914.wav
Depl1092_1678036995.200213004914.wav
Depl1092_1678036995.200213024914.wav
Depl1092_1678036995.200213084914.wav
Depl1092_1678036995.200226104914.wav
Depl1092_1678036995.200226124914.wav
Depl1092_1678036995.200227004914.wav


Let's look at the summary of annotations that were created:

In [15]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202001_1092-112,216,216
Total,216,216


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [16]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 5: UK-UAberdeen-MorayFirth-202001_1093-164 (seismic)

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [17]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-202001_1093-164'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = 'S' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [18]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1093_1677725722.200104205913.wav
Depl1093_1677725722.200110095913.wav
Depl1093_1677725722.200110115913.wav
Depl1093_1677725722.200111205913.wav
Depl1093_1677725722.200119035913.wav
Depl1093_1677725722.200121195913.wav
Depl1093_1677725722.200121235913.wav
Depl1093_1677725722.200123235913.wav
Depl1093_1677725722.200124025913.wav
Depl1093_1677725722.200124165913.wav
Depl1093_1677725722.200126065913.wav
Depl1093_1677725722.200126095913.wav
Depl1093_1677725722.200128135913.wav
Depl1093_1677725722.200130015913.wav
Depl1093_1677725722.200131095913.wav
Depl1093_1677725722.200201185913.wav
Depl1093_1677725722.200202025913.wav
Depl1093_1677725722.200204195913.wav
Depl1093_1677725722.200205085913.wav
Depl1093_1677725722.200205095913.wav
Depl1093_1677725722.200205235913.wav
Depl1093_1677725722.200206015913.wav
Depl1093_1677725722.200213005913.wav
Depl1093_1677725722.200213015913.wav
Depl1093_1677725722.200226235913.wav


Let's look at the summary of annotations that were created:

In [19]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202001_1093-164,225,225
Total,225,225


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [20]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 6: UK-UAberdeen-MorayFirth-202101_1136-164

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [22]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-202101_1136-164'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [23]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1136_1677725722.210102130002.wav
Depl1136_1677725722.210103230002.wav
Depl1136_1677725722.210105030002.wav
Depl1136_1677725722.210105110002.wav
Depl1136_1677725722.210119110002.wav
Depl1136_1677725722.210119180002.wav
Depl1136_1677725722.210208180002.wav
Depl1136_1677725722.210216140002.wav
Depl1136_1677725722.210216170002.wav
Depl1136_1677725722.210217150002.wav
Depl1136_1677725722.210220090002.wav
Depl1136_1677725722.210221010002.wav


Let's look at the summary of annotations that were created:

In [24]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202101_1136-164,108,108
Total,108,108


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [25]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 7: UK-UAberdeen-MorayFirth-202101_1137-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [27]:
audio_dir = r'C:\Users\xavier.mouy\Documents\GitHub\minke-whale-dataset\datasets\UK-UAberdeen-MorayFirth-202101_1137-112'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [28]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1137_1678508072.210107040002.wav
Depl1137_1678508072.210108160002.wav
Depl1137_1678508072.210113150002.wav
Depl1137_1678508072.210114040002.wav
Depl1137_1678508072.210116170002.wav
Depl1137_1678508072.210119040002.wav
Depl1137_1678508072.210122000002.wav
Depl1137_1678508072.210123040002.wav
Depl1137_1678508072.210123120002.wav
Depl1137_1678508072.210208160002.wav
Depl1137_1678508072.210211200002.wav
Depl1137_1678508072.210213110002.wav


Let's look at the summary of annotations that were created:

In [29]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202101_1137-112,108,108
Total,108,108


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [30]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 8: UK-MSS-MorayFirth-201607_181-Helmsdale

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [55]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-MSS-MorayFirth-201607_181-Helmsdale\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [61]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl181_Helmsdale_161113_013327.wav
Depl181_Helmsdale_160731_140000.wav
Depl181_Helmsdale_160731_153000.wav
Depl181_Helmsdale_160731_163000.wav
Depl181_Helmsdale_160731_180000.wav
Depl181_Helmsdale_160731_193000.wav
Depl181_Helmsdale_160801_080000.wav
Depl181_Helmsdale_160801_093000.wav
Depl181_Helmsdale_160801_173000.wav
Depl181_Helmsdale_160802_080000.wav
Depl181_Helmsdale_160802_083000.wav
Depl181_Helmsdale_160802_130000.wav
Depl181_Helmsdale_160802_160000.wav
Depl181_Helmsdale_160802_163000.wav
Depl181_Helmsdale_160802_190000.wav
Depl181_Helmsdale_160803_150000.wav
Depl181_Helmsdale_160804_000000.wav
Depl181_Helmsdale_160804_100000.wav
Depl181_Helmsdale_160804_103000.wav
Depl181_Helmsdale_160804_123000.wav
Depl181_Helmsdale_160804_130000.wav
Depl181_Helmsdale_160804_150000.wav
Depl181_Helmsdale_160805_013000.wav
Depl181_Helmsdale_160806_113000.wav
Depl181_Helmsdale_160809_063000.wav
Depl181_Helmsdale_160809_113000.wav
Depl181_Helmsdale_160809_200000.wav
Depl181_Helmsdale_160810_073

Depl181_Helmsdale_161004_213000.wav
Depl181_Helmsdale_161004_230000.wav
Depl181_Helmsdale_161006_120000.wav
Depl181_Helmsdale_161006_133000.wav
Depl181_Helmsdale_161006_203000.wav
Depl181_Helmsdale_161006_220000.wav
Depl181_Helmsdale_161007_090000.wav
Depl181_Helmsdale_161007_093000.wav
Depl181_Helmsdale_161007_100000.wav
Depl181_Helmsdale_161007_110000.wav
Depl181_Helmsdale_161007_143000.wav
Depl181_Helmsdale_161007_150000.wav
Depl181_Helmsdale_161007_173000.wav
Depl181_Helmsdale_161007_193000.wav
Depl181_Helmsdale_161008_120000.wav
Depl181_Helmsdale_161009_070000.wav
Depl181_Helmsdale_161009_073000.wav
Depl181_Helmsdale_161009_080000.wav
Depl181_Helmsdale_161009_093000.wav
Depl181_Helmsdale_161009_130000.wav
Depl181_Helmsdale_161009_140000.wav
Depl181_Helmsdale_161010_213000.wav
Depl181_Helmsdale_161011_013000.wav
Depl181_Helmsdale_161011_120000.wav
Depl181_Helmsdale_161011_203000.wav
Depl181_Helmsdale_161011_230000.wav
Depl181_Helmsdale_161011_233000.wav
Depl181_Helmsdale_161012_000

Let's look at the summary of annotations that were created:

In [57]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-MSS-MorayFirth-201607_181-Helmsdale,4008,4008
Total,4008,4008


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [58]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 9: UK-MSS-MorayFirth-201806_274-Helmsdale

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [62]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-MSS-MorayFirth-201806_274-Helmsdale\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [63]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl274_Helmsdale_180618_143000.wav
Depl274_Helmsdale_180601_140000.wav
Depl274_Helmsdale_180602_060000.wav
Depl274_Helmsdale_180602_063000.wav
Depl274_Helmsdale_180602_073000.wav
Depl274_Helmsdale_180602_090000.wav
Depl274_Helmsdale_180602_093000.wav
Depl274_Helmsdale_180602_103000.wav
Depl274_Helmsdale_180602_170000.wav
Depl274_Helmsdale_180602_220000.wav
Depl274_Helmsdale_180602_230000.wav
Depl274_Helmsdale_180603_000000.wav
Depl274_Helmsdale_180603_010000.wav


Let's look at the summary of annotations that were created:

In [64]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-MSS-MorayFirth-201806_274-Helmsdale,121,121
Total,121,121


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [65]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 10: UK-MSS-MorayFirth-201904_360-Helmsdale

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [66]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-MSS-MorayFirth-201904_360-Helmsdale\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [67]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl360_Helmsdale_190731_160000.wav
Depl360_Helmsdale_190601_143000.wav
Depl360_Helmsdale_190601_200000.wav
Depl360_Helmsdale_190601_203000.wav
Depl360_Helmsdale_190601_213000.wav
Depl360_Helmsdale_190602_190000.wav
Depl360_Helmsdale_190606_033000.wav
Depl360_Helmsdale_190609_213000.wav
Depl360_Helmsdale_190609_230000.wav
Depl360_Helmsdale_190610_013000.wav
Depl360_Helmsdale_190610_073000.wav
Depl360_Helmsdale_190610_120000.wav
Depl360_Helmsdale_190610_130000.wav
Depl360_Helmsdale_190610_223000.wav
Depl360_Helmsdale_190610_233000.wav
Depl360_Helmsdale_190611_000000.wav
Depl360_Helmsdale_190613_033000.wav
Depl360_Helmsdale_190614_050000.wav
Depl360_Helmsdale_190614_190000.wav
Depl360_Helmsdale_190617_083000.wav
Depl360_Helmsdale_190618_023000.wav
Depl360_Helmsdale_190618_120000.wav
Depl360_Helmsdale_190619_093000.wav
Depl360_Helmsdale_190619_100000.wav
Depl360_Helmsdale_190620_213000.wav
Depl360_Helmsdale_190620_220000.wav
Depl360_Helmsdale_190622_033000.wav
Depl360_Helmsdale_190622_210

Let's look at the summary of annotations that were created:

In [68]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-MSS-MorayFirth-201904_360-Helmsdale,482,482
Total,482,482


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [69]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 11: UK-MSS-MorayFirth-201908_421-Helmsdale

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [90]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-MSS-MorayFirth-201908_421-Helmsdale\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [91]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl421_Helmsdale_191031_221000.wav
Depl421_Helmsdale_190901_221000.wav
Depl421_Helmsdale_190902_111000.wav
Depl421_Helmsdale_190902_114000.wav
Depl421_Helmsdale_190902_121000.wav
Depl421_Helmsdale_190902_124000.wav
Depl421_Helmsdale_190902_131000.wav
Depl421_Helmsdale_190902_164000.wav
Depl421_Helmsdale_190902_201000.wav
Depl421_Helmsdale_190902_204000.wav
Depl421_Helmsdale_190902_211000.wav
Depl421_Helmsdale_190902_214000.wav
Depl421_Helmsdale_190902_221000.wav
Depl421_Helmsdale_190902_224000.wav
Depl421_Helmsdale_190902_231000.wav
Depl421_Helmsdale_190902_234000.wav
Depl421_Helmsdale_190903_001000.wav
Depl421_Helmsdale_190903_021000.wav
Depl421_Helmsdale_190903_024000.wav
Depl421_Helmsdale_190903_034000.wav
Depl421_Helmsdale_190903_234000.wav
Depl421_Helmsdale_190904_064000.wav
Depl421_Helmsdale_190904_104000.wav
Depl421_Helmsdale_190904_111000.wav
Depl421_Helmsdale_190904_114000.wav
Depl421_Helmsdale_190904_121000.wav
Depl421_Helmsdale_190904_124000.wav
Depl421_Helmsdale_190905_121

Depl421_Helmsdale_191029_101000.wav
Depl421_Helmsdale_191029_104000.wav
Depl421_Helmsdale_191029_151000.wav
Depl421_Helmsdale_191029_154000.wav
Depl421_Helmsdale_191029_161000.wav
Depl421_Helmsdale_191029_204000.wav
Depl421_Helmsdale_191029_211000.wav
Depl421_Helmsdale_191029_214000.wav
Depl421_Helmsdale_191029_221000.wav
Depl421_Helmsdale_191029_231000.wav
Depl421_Helmsdale_191030_074000.wav
Depl421_Helmsdale_191030_081000.wav
Depl421_Helmsdale_191030_084000.wav
Depl421_Helmsdale_191030_091000.wav
Depl421_Helmsdale_191030_094000.wav
Depl421_Helmsdale_191030_101000.wav
Depl421_Helmsdale_191030_104000.wav
Depl421_Helmsdale_191030_111000.wav
Depl421_Helmsdale_191030_161000.wav
Depl421_Helmsdale_191030_231000.wav
Depl421_Helmsdale_191031_004000.wav
Depl421_Helmsdale_191031_214000.wav


Let's look at the summary of annotations that were created:

In [92]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-MSS-MorayFirth-201908_421-Helmsdale,2346,2346
Total,2346,2346


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [93]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 12: UK-SAMS-WestScotland-201711-StantonBank

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [94]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-SAMS-WestScotland-201711-StantonBank\Noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [95]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

PAM_20180321_000004_000_noise_end.wav
PAM_20181124_000003_000.wav
PAM_20181124_010003_000.wav
PAM_20181124_020003_000.wav
PAM_20181124_030003_000.wav
PAM_20181124_040003_000.wav
PAM_20181124_050003_000.wav
PAM_20181124_060003_000.wav
PAM_20181124_070003_000.wav
PAM_20181124_080003_000.wav
PAM_20181124_090003_000.wav
PAM_20181124_100003_000.wav
PAM_20181124_110003_000.wav
PAM_20181124_120003_000.wav
PAM_20181124_130003_000.wav
PAM_20181124_140003_000.wav
PAM_20181124_150003_000.wav
PAM_20181124_160003_000.wav
PAM_20181124_170003_000.wav
PAM_20181124_180003_000.wav
PAM_20181124_190003_000.wav
PAM_20181124_200003_000.wav
PAM_20181124_210003_000.wav
PAM_20181124_220003_000.wav
PAM_20181124_230003_000.wav
PAM_20181225_000003_000.wav
PAM_20181225_010003_000.wav
PAM_20181225_020003_000.wav
PAM_20181225_030003_000.wav
PAM_20181225_040003_000.wav
PAM_20181225_050003_000.wav
PAM_20181225_060003_000.wav
PAM_20181225_070003_000.wav
PAM_20181225_080003_000.wav
PAM_20181225_090003_000.wav
PAM_201812

Let's look at the summary of annotations that were created:

In [96]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-SAMS-WestScotland-201711-StantonBank,1911,1911
Total,1911,1911


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [97]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 13: UK-SAMS-WestScotland-202009-N1

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [108]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-SAMS-WestScotland-202009-N1\Noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [109]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

channelA_2020-10-19_00-00-04_noise_end.wav
channelA_2020-12-12_00-00-05.wav
channelA_2020-12-12_01-00-05.wav
channelA_2020-12-12_02-00-05.wav
channelA_2020-12-12_03-00-05.wav
channelA_2020-12-12_04-00-05.wav
channelA_2020-12-12_05-00-05.wav
channelA_2020-12-12_06-00-05.wav
channelA_2020-12-12_07-00-05.wav
channelA_2020-12-12_08-00-05.wav
channelA_2020-12-12_09-00-05.wav
channelA_2020-12-12_10-00-05.wav
channelA_2020-12-12_11-00-05.wav
channelA_2020-12-12_12-00-05.wav
channelA_2020-12-12_13-00-05.wav
channelA_2020-12-12_14-00-05.wav
channelA_2020-12-12_15-00-05.wav
channelA_2020-12-12_16-00-05.wav
channelA_2020-12-12_17-00-05.wav
channelA_2020-12-12_18-00-05.wav
channelA_2020-12-12_19-00-05.wav
channelA_2020-12-12_20-00-05.wav
channelA_2020-12-12_21-00-05.wav
channelA_2020-12-12_22-00-05.wav
channelA_2020-12-12_23-00-05.wav
channelA_2020-12-18_00-00-04.wav
channelA_2020-12-18_01-00-03.wav
channelA_2020-12-18_02-00-03.wav
channelA_2020-12-18_03-00-03.wav
channelA_2020-12-18_04-00-03.wav


channelA_2021-05-03_21-00-04.wav
channelA_2021-05-03_22-00-04.wav
channelA_2021-05-03_23-00-04.wav
channelA_2021-05-04_00-00-04.wav
channelA_2021-05-04_01-00-04.wav
channelA_2021-05-04_02-00-04.wav
channelA_2021-05-04_03-00-04.wav
channelA_2021-05-04_04-00-04.wav
channelA_2021-05-04_05-00-04.wav
channelA_2021-05-04_06-00-04.wav
channelA_2021-05-04_07-00-04.wav
channelA_2021-05-04_08-00-04.wav
channelA_2021-05-04_09-00-04.wav
channelA_2021-05-04_10-00-04.wav
channelA_2021-05-04_11-00-04.wav
channelA_2021-05-04_12-00-04.wav
channelA_2021-05-04_13-00-04.wav
channelA_2021-05-04_14-00-04.wav
channelA_2021-05-04_15-00-04.wav
channelA_2021-05-04_16-00-04.wav
channelA_2021-05-04_17-00-04.wav
channelA_2021-05-04_18-00-04.wav
channelA_2021-05-04_19-00-04.wav
channelA_2021-05-04_20-00-04.wav
channelA_2021-05-04_21-00-04.wav
channelA_2021-05-04_22-00-04.wav
channelA_2021-05-04_23-00-04.wav
channelA_2021-05-05_00-00-04.wav
channelA_2021-05-05_01-00-04.wav
channelA_2021-05-05_02-00-04.wav
channelA_2

Let's look at the summary of annotations that were created:

In [110]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-SAMS-WestScotland-202009-N1,8922,8922
Total,8922,8922


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [111]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 14: UK-SAMS-WestScotland-202009-N2

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [112]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-SAMS-WestScotland-202009-N2\Noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [113]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

channelA_2020-12-30_00-00-04.wav
channelA_2020-12-30_01-00-03.wav
channelA_2020-12-30_02-00-03.wav
channelA_2020-12-30_03-00-04.wav
channelA_2020-12-30_04-00-03.wav
channelA_2020-12-30_05-00-03.wav
channelA_2020-12-30_06-00-03.wav
channelA_2020-12-30_07-00-03.wav
channelA_2020-12-30_08-00-03.wav
channelA_2020-12-30_09-00-03.wav
channelA_2020-12-30_10-00-03.wav
channelA_2020-12-30_11-00-03.wav
channelA_2020-12-30_12-00-04.wav
channelA_2020-12-30_13-00-03.wav
channelA_2020-12-30_14-00-03.wav
channelA_2020-12-30_15-00-03.wav
channelA_2020-12-30_16-00-04.wav
channelA_2020-12-30_17-00-03.wav
channelA_2020-12-30_18-00-03.wav
channelA_2020-12-30_19-00-04.wav
channelA_2020-12-30_20-00-03.wav
channelA_2020-12-30_21-00-03.wav
channelA_2020-12-30_22-00-03.wav
channelA_2020-12-30_23-00-04.wav
channelA_2021-01-11_00-00-04.wav
channelA_2021-01-11_01-00-04.wav
channelA_2021-01-11_02-00-04.wav
channelA_2021-01-11_03-00-04.wav
channelA_2021-01-11_04-00-04.wav
channelA_2021-01-11_05-00-04.wav
channelA_2

Let's look at the summary of annotations that were created:

In [114]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-SAMS-WestScotland-202009-N2,4461,4461
Total,4461,4461


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [115]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 15: UK-SAMS-WestScotland-202010-EL1

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [116]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-SAMS-WestScotland-202010-EL1\Noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [117]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

channelA_2020-11-17_00-00-04_noise_end.wav
channelA_2020-12-26_00-00-05.wav
channelA_2020-12-26_01-00-04.wav
channelA_2020-12-26_02-00-05.wav
channelA_2020-12-26_03-00-04.wav
channelA_2020-12-26_04-00-04.wav
channelA_2020-12-26_05-00-05.wav
channelA_2020-12-26_06-00-05.wav
channelA_2020-12-26_07-00-04.wav
channelA_2020-12-26_08-00-05.wav
channelA_2020-12-26_09-00-04.wav
channelA_2020-12-26_10-00-04.wav
channelA_2020-12-26_11-00-04.wav
channelA_2020-12-26_12-00-04.wav
channelA_2020-12-26_13-00-05.wav
channelA_2020-12-26_14-00-04.wav
channelA_2020-12-26_15-00-05.wav
channelA_2020-12-26_16-00-04.wav
channelA_2020-12-26_17-00-05.wav
channelA_2020-12-26_18-00-05.wav
channelA_2020-12-26_19-00-04.wav
channelA_2020-12-26_20-00-05.wav
channelA_2020-12-26_21-00-05.wav
channelA_2020-12-26_22-00-05.wav
channelA_2020-12-26_23-00-04.wav
channelA_2021-01-05_00-00-05.wav
channelA_2021-01-05_01-00-05.wav
channelA_2021-01-05_02-00-05.wav
channelA_2021-01-05_03-00-05.wav
channelA_2021-01-05_04-00-05.wav


Let's look at the summary of annotations that were created:

In [118]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-SAMS-WestScotland-202010-EL1,4027,4027
Total,4027,4027


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [119]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 16: UK-SAMS-WestScotland-202010-S1

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [120]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-SAMS-WestScotland-202010-S1\Noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [121]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

channelA_2020-10-23_00-00-04.wav
channelA_2020-10-23_01-00-04.wav
channelA_2020-10-23_02-00-03.wav
channelA_2020-10-23_03-00-03.wav
channelA_2020-10-23_04-00-04.wav
channelA_2020-10-23_05-00-03.wav
channelA_2020-10-23_06-00-03.wav
channelA_2020-10-23_07-00-03.wav
channelA_2020-10-23_08-00-03.wav
channelA_2020-10-23_09-00-03.wav
channelA_2020-10-23_10-00-03.wav
channelA_2020-10-23_11-00-03.wav
channelA_2020-10-23_12-00-04.wav
channelA_2020-10-23_13-00-03.wav
channelA_2020-10-23_14-00-03.wav
channelA_2020-10-23_15-00-03.wav
channelA_2020-10-23_16-00-04.wav
channelA_2020-10-23_17-00-03.wav
channelA_2020-10-23_18-00-03.wav
channelA_2020-10-23_19-00-03.wav
channelA_2020-10-23_20-00-03.wav
channelA_2020-10-23_21-00-03.wav
channelA_2020-10-23_22-00-03.wav
channelA_2020-10-23_23-00-03.wav
channelA_2020-11-28_00-00-04.wav
channelA_2020-11-28_01-00-04.wav
channelA_2020-11-28_02-00-04.wav
channelA_2020-11-28_03-00-04.wav
channelA_2020-11-28_04-00-04.wav
channelA_2020-11-28_05-00-04.wav
channelA_2

Let's look at the summary of annotations that were created:

In [122]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-SAMS-WestScotland-202010-S1,5327,5327
Total,5327,5327


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [123]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 17: UK-UAberdeen-MorayFirth-201709_887-162

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [135]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-201709_887-162\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [136]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl887_1678508072.170921200002.wav
Depl887_1678508072.170909060002.wav


Let's look at the summary of annotations that were created:

In [138]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201709_887-162,18,18
Total,18,18


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [139]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 18: UK-UAberdeen-MorayFirth-201905_987-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [140]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-201905_987-112\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [141]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl987_1347451_190610231441.wav
Depl987_1347451_190505222502.wav
Depl987_1347451_190505225502.wav
Depl987_1347451_190505235501.wav
Depl987_1347451_190506002501.wav
Depl987_1347451_190506005501.wav
Depl987_1347451_190523032254.wav
Depl987_1347451_190607024508.wav


Let's look at the summary of annotations that were created:

In [142]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201905_987-112,233,233
Total,233,233


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [143]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 19: UK-UAberdeen-MorayFirth-201905_987-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [144]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-201905_1032-164\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [145]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1032_1678508072.190521165218.wav
Depl1032_1678508072.190401045951.wav
Depl1032_1678508072.190411225740.wav
Depl1032_1678508072.190415225713.wav
Depl1032_1678508072.190416105710.wav
Depl1032_1678508072.190418165650.wav
Depl1032_1678508072.190424165556.wav
Depl1032_1678508072.190425105552.wav
Depl1032_1678508072.190426045547.wav
Depl1032_1678508072.190429045528.wav
Depl1032_1678508072.190430105517.wav
Depl1032_1678508072.190505105434.wav
Depl1032_1678508072.190505165432.wav
Depl1032_1678508072.190505225431.wav
Depl1032_1678508072.190507165415.wav
Depl1032_1678508072.190509045359.wav
Depl1032_1678508072.190511045342.wav
Depl1032_1678508072.190517105252.wav
Depl1032_1678508072.190519165233.wav


Let's look at the summary of annotations that were created:

In [146]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201905_1032-164,6821,6821
Total,6821,6821


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [147]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 20: UK-UAberdeen-MorayFirth-201911_1092-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [148]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-201911_1092-112\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [149]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1092_1678036995.200202024914.wav
Depl1092_1678036995.191106144914.wav
Depl1092_1678036995.191222074914.wav


Let's look at the summary of annotations that were created:

In [150]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201911_1092-112,27,27
Total,27,27


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [151]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 21: UK-UAberdeen-MorayFirth-201911_1093-164

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [152]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-201911_1093-164\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [153]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1093_1677725722.191106155913.wav


Let's look at the summary of annotations that were created:

In [154]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-201911_1093-164,9,9
Total,9,9


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [155]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 22: UK-UAberdeen-MorayFirth-202006_1101-164

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [156]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202006_1101-164\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [157]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1101_134541352_200717210002.wav
Depl1101_134541352_200607120002.wav
Depl1101_134541352_200608220002.wav
Depl1101_134541352_200609210002.wav
Depl1101_134541352_200610190002.wav
Depl1101_134541352_200617040002.wav
Depl1101_134541352_200618070002.wav
Depl1101_134541352_200620120002.wav
Depl1101_134541352_200621120002.wav
Depl1101_134541352_200621130002.wav


Let's look at the summary of annotations that were created:

In [158]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202006_1101-164,91,91
Total,91,91


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [159]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 23: UK-UAberdeen-MorayFirth-202006_1102-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [160]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202006_1102-112\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [161]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1102_1678254119_200617040002.wav


Let's look at the summary of annotations that were created:

In [162]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202006_1102-112,9,9
Total,9,9


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [163]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 24: UK-UAberdeen-MorayFirth-202010_1137-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [164]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202010_1137-112\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [165]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1137_1678508072_201230040002.wav
Depl1137_1678508072_201020080002.wav
Depl1137_1678508072_201022090002.wav
Depl1137_1678508072_201023080002.wav
Depl1137_1678508072_201023220002.wav
Depl1137_1678508072_201026100002.wav
Depl1137_1678508072_201031040002.wav
Depl1137_1678508072_201031150002.wav
Depl1137_1678508072_201031160002.wav


Let's look at the summary of annotations that were created:

In [166]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202010_1137-112,81,81
Total,81,81


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [167]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 25: UK-UAberdeen-MorayFirth-202101_1136-164

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [168]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202101_1136-164\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [169]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1136_1677725722.210306060002.wav
Depl1136_1677725722.210111080002.wav
Depl1136_1677725722.210117130002.wav
Depl1136_1677725722.210225000002.wav


Let's look at the summary of annotations that were created:

In [170]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202101_1136-164,37,37
Total,37,37


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [171]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 26: UK-UAberdeen-MorayFirth-202105_1160-112

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [172]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202105_1160-112\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [173]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1160_1678036995_210928140002.wav
Depl1160_1678036995_210502000002.wav
Depl1160_1678036995_210512200002.wav
Depl1160_1678036995_210513170002.wav
Depl1160_1678036995_210514090002.wav
Depl1160_1678036995_210518060002.wav
Depl1160_1678036995_210519160002.wav
Depl1160_1678036995_210522020002.wav
Depl1160_1678036995_210523020002.wav
Depl1160_1678036995_210525020002.wav
Depl1160_1678036995_210526190002.wav
Depl1160_1678036995_210531120002.wav
Depl1160_1678036995_210609190002.wav
Depl1160_1678036995_210613170002.wav
Depl1160_1678036995_210617060002.wav
Depl1160_1678036995_210701100002.wav
Depl1160_1678036995_210717050002.wav
Depl1160_1678036995_210720110002.wav
Depl1160_1678036995_210907110002.wav
Depl1160_1678036995_210908150002.wav
Depl1160_1678036995_210909140002.wav
Depl1160_1678036995_210910080002.wav
Depl1160_1678036995_210910120002.wav
Depl1160_1678036995_210911060002.wav
Depl1160_1678036995_210911140002.wav
Depl1160_1678036995_210912140002.wav
Depl1160_1678036995_210914120002.wav
D

Let's look at the summary of annotations that were created:

In [174]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202105_1160-112,416,416
Total,416,416


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [175]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 27: UK-UAberdeen-MorayFirth-202107_1173-281

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [176]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202107_1173-281\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [177]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1173_1678508072_210619100002.wav
Depl1173_1678508072_210603070002.wav


Let's look at the summary of annotations that were created:

In [178]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202107_1173-281,18,18
Total,18,18


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [179]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)

### Dataset 28: UK-UAberdeen-MorayFirth-202107_1190-295

Definition of all the paths of all folders with the raw annotation and audio files for this deployment.

In [180]:
audio_dir = r'D:\NOAA\2022_Minke_whale_detector\manual_annotations\UK-UAberdeen-MorayFirth-202107_1190-295\noise'
deployment_file = r'deployment_info.csv' 
file_ext = 'wav'

annot_dur_sec = 60  # duration of the noise annotations in seconds
label_class = 'NN'  # label to use for the noise class
label_subclass = '' # label to use for the noise subclass (if needed, e.g. S for seismic airguns)

In [181]:
annot = create_noise_annot(audio_dir, deployment_file, file_ext, annot_dur_sec, label_class, label_subclass)

Depl1190_6281_210726055604.wav
Depl1190_6281_210714025737.wav
Depl1190_6281_210714055736.wav
Depl1190_6281_210714115734.wav
Depl1190_6281_210715175725.wav
Depl1190_6281_210716055720.wav
Depl1190_6281_210716085719.wav
Depl1190_6281_210716235714.wav
Depl1190_6281_210717055712.wav
Depl1190_6281_210720085646.wav
Depl1190_6281_210720115645.wav
Depl1190_6281_210720205642.wav
Depl1190_6281_210720235641.wav
Depl1190_6281_210721025640.wav
Depl1190_6281_210721055639.wav
Depl1190_6281_210721145636.wav
Depl1190_6281_210721235633.wav
Depl1190_6281_210722115629.wav
Depl1190_6281_210722175627.wav
Depl1190_6281_210722205626.wav
Depl1190_6281_210722235626.wav
Depl1190_6281_210723025625.wav
Depl1190_6281_210723055624.wav
Depl1190_6281_210723085623.wav
Depl1190_6281_210723115622.wav
Depl1190_6281_210723145622.wav
Depl1190_6281_210723175621.wav
Depl1190_6281_210723205620.wav
Depl1190_6281_210723235619.wav
Depl1190_6281_210724025618.wav
Depl1190_6281_210724085617.wav
Depl1190_6281_210724115616.wav
Depl1190

Let's look at the summary of annotations that were created:

In [182]:
annot.summary()

label_class,NN,Total
deployment_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
UK-UAberdeen-MorayFirth-202107_1190-295,7530,7530
Total,7530,7530


The dataset can now be saved as a Raven annotation file and netcdf4 file:

In [183]:
annot.to_netcdf(os.path.join(audio_dir, 'Annotations_dataset_' + annot.data['deployment_ID'][0] +' annotations.nc'))
annot.to_raven(audio_dir, outfile='Annotations_dataset_' + annot.data['deployment_ID'][0] +'.Table.1.selections.txt', single_file=True)