In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import analysis
import db
import fetcher
from recording_selection import RecordingSelection, load_recording
from recordings import Recording, RecordingOverrides
from species import CommonName
from trim_recordings import detect_utterances

import IPython
import librosa
import librosa.display
import librosa.feature
import matplotlib
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import pydub
import scipy.ndimage
from tqdm import tqdm

import collections
import hashlib
import io
import itertools
import multiprocessing
import warnings

warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
plt.rcParams['svg.fonttype'] = 'none'

In [None]:
session = db.create_session('master.db')
recordings_fetcher = fetcher.Fetcher('recordings', pool_size=8)
recording_overrides = RecordingOverrides()

Load recordings from the database and filter them according to some selection criteria:
right species, contains the data we need, good quality, not too short and not too long.

Then get them from the cache or download them if necessary.

In [None]:
DUTCH_COMMON_NAME = 'Merel'
COUNT = 12

species = session.query(CommonName).filter(
    CommonName.language_code == 'nl',
    CommonName.common_name == DUTCH_COMMON_NAME,
).one().species

selection = RecordingSelection(species, session, recordings_fetcher, recording_overrides)
print(f'Found {len(selection.candidate_recordings)} candidate recordings')
recordings = {
    recording.recording_id: recording
    for recording in selection.candidate_recordings[:COUNT]
}
sounds = {
    recording.recording_id: load_recording(recording, recordings_fetcher)
    for (recording_id, recording) in tqdm(recordings.items())
}

Analyze and plot the results, ordered by descending noise volume.

In [None]:
analyses = {}
for recording_id, sound in sounds.items():
    analyses[recording_id] = analysis.Analysis(sound)

for (recording_id, a) in sorted(analyses.items(), key=lambda ia: ia[1].perceptual_noise_volume_db):
    # Create figure.
    fig, ax = plt.subplots(1, 2, figsize=(9, 3), gridspec_kw={'width_ratios': [8, 1]}, sharey='all')
    
    # Display spectrogram on the left.
    img = librosa.display.specshow(librosa.power_to_db(a.mel_spectrogram),
                                   x_axis='time', y_axis='mel', fmin=0.0, fmax=analysis.SAMPLE_RATE / 2,
                                   cmap='magma', vmin=-80, vmax=0,
                                   hop_length=analysis.FFT_HOP_LENGTH, sr=analysis.SAMPLE_RATE, ax=ax[0])
    ax[0].set_title(f'{recording_id} - {recordings[recording_id].type}')
    ax[0].title.set_url('https:' + recordings[recording_id].url)
    
    # Display vocalizations as transparent green rectangles on top of the spectrogram.
    for (start, end) in a.vocalizations:
        ax[0].add_patch(
            Rectangle((start, 0), end - start, analysis.SAMPLE_RATE / 2,
                      edgecolor='none', facecolor='#00ff0050'))
    
    # Plot volume of noise-filtered signal on top of spectrogram.
    ax_right = ax[0].twinx()
    xs = (np.arange(0, a.mel_spectrogram.shape[1]) + 0.5) * analysis.FFT_HOP_LENGTH / analysis.SAMPLE_RATE
    ax_right.plot(xs, a.filtered_volume_db,
                  linewidth=1.0)
    ax_right.set_ylim(-80, 0)
    
    # Add lines at our vocalization thresholds.
    ax_right.axhline(a.noise_volume_db, color='red')
    ax_right.axhline(a.noise_volume_db + analysis.VOCALIZATION_TRIGGER_THRESHOLD_DB, color='yellow')
    ax_right.axhline(a.noise_volume_db + analysis.VOCALIZATION_KEEP_THRESHOLD_DB, color='green')
    
    # Display noise profile on the right.
    img = librosa.display.specshow(librosa.power_to_db(a.noise_profile),
                                   y_axis='mel', fmin=0.0, fmax=analysis.SAMPLE_RATE / 2, ax=ax[1],
                                   cmap='magma', vmin=-80, vmax=0)
    ax[1].set_xticks([])
    ax[1].set_ylabel(None)
    ax[1].set_title(f'Noise: {a.perceptual_noise_volume_db:.1f} dB')
    fig.colorbar(img, ax=ax[1])
    
    plt.tight_layout()
    plt.show()
    
    IPython.display.display(IPython.display.Audio(sounds[recording_id], rate=analysis.SAMPLE_RATE))