In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import analysis
import recording_selection
import db
import fetcher
from recordings import Recording, RecordingOverrides
from species import CommonName
from trim_recordings import detect_utterances

import IPython
import librosa
import librosa.display
import librosa.feature
import matplotlib
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import pydub
import scipy.ndimage
import scipy.signal
from tqdm import tqdm

import collections
import hashlib
import io
import itertools
import logging
import multiprocessing
import warnings

warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
# logging.getLogger().setLevel(level=logging.DEBUG)

In [None]:
session = db.create_session('master.db')
recordings_fetcher = fetcher.Fetcher('recordings', pool_size=8)
recording_overrides = RecordingOverrides()

Load up a set of good quality recordings to play around with.

In [None]:
DUTCH_COMMON_NAME = 'Merel'
COUNT = 12
species = session.query(CommonName).filter(
    CommonName.language_code == 'nl',
    CommonName.common_name == DUTCH_COMMON_NAME,
).one().species

selection = recording_selection.RecordingSelection(species, session, recordings_fetcher, recording_overrides)
print(f'Found {len(selection.candidate_recordings)} candidate recordings for {species.scientific_name} ({species.common_name("nl")}); loading the first {COUNT}...')

recordings_analyses = list(tqdm(itertools.islice(selection.suitable_recordings(), COUNT), total=COUNT))

Display results of vocalization detection.

In [None]:
counts = collections.defaultdict(int)
for r in selection.candidate_recordings:
    for t in r.types:
        counts[t] += 1
for t, c in sorted(counts.items(), key=lambda i: i[1], reverse=True):
    print(f'{c:3}  {t}')

In [None]:
specshow_args = dict(
    x_axis='time',
    y_axis='log', fmin=0, fmax=analysis.SAMPLE_RATE / 2,
    cmap='magma', vmin=-80, vmax=0,
    hop_length=analysis.FFT_HOP_LENGTH, sr=analysis.SAMPLE_RATE,
)

for r, a in recordings_analyses:
    # Re-create the analysis object so that cached values don't bite us while we change the code.
    a = analysis.Analysis(a.sound)
    
    # Make sure to use the same reference amplitude for the dB conversion on both sides of the plot.
    ref = np.amax(a.filtered_spectrogram)

    # Create figure.
    fig, ax = plt.subplots(1, 2, figsize=(12, 3), gridspec_kw={'width_ratios': [8, 1]}, sharey='all')
    
    # Display filtered spectrogram on the left.
    img = librosa.display.specshow(librosa.amplitude_to_db(a.filtered_spectrogram, ref=ref),
                                   **specshow_args, ax=ax[0])
    ax[0].set_title(f'{r.recording_id} - {r.type}')
    ax[0].title.set_url('https:' + r.url)
    
    # Plot volume of signal on top of spectrogram.
    ax_right = ax[0].twinx()
    xs = analysis.frames_to_time(np.arange(0, a.spectrogram.shape[1]))
    ax_right.plot(xs, a.volume_db,
                  color='red', linewidth=1.0)
    ax_right.plot(xs, a.perceptual_filtered_volume_db,
                  color='lime', linewidth=1.0)
    ax_right.set_ylim(-80, 0)

    # Display vocalizations as transparent green rectangles on top of the spectrogram.
    for vocalization in a.vocalizations:
        ax[0].add_patch(
            Rectangle((vocalization.start, 0), vocalization.duration, analysis.SAMPLE_RATE / 2,
                      edgecolor='none', facecolor='#00ff0040'))
    
    # Add lines at our vocalization thresholds.
    ax_right.axhline(analysis.VOCALIZATION_TRIGGER_THRESHOLD_DB, color='yellow')
    ax_right.axhline(analysis.VOCALIZATION_KEEP_THRESHOLD_DB, color='orange')
    
    # Display noise profile on the right.
    img = librosa.display.specshow(librosa.amplitude_to_db(a.noise_profile, ref=ref), **specshow_args, ax=ax[1])
    ax[1].set_xticks([])
    ax[1].set_xlabel(None)
    ax[1].set_ylabel(None)
    fig.colorbar(img, ax=ax[1])
    
    plt.tight_layout()
    plt.show()
    
    IPython.display.display(IPython.display.Audio(a.sound, rate=analysis.SAMPLE_RATE))

In [None]:
def show_spectrograms(spectrograms, ax=None, **kwargs):
    max_width = 3000
    x = 0
    y = 0
    for spectrogram in spectrograms:
        h, w = spectrogram.shape
        if x + w > max_width and x > 0:
            x = 0
            y -= h
        ax.imshow(spectrogram, cmap='magma', origin='lower', aspect='auto', extent=(x, x + w, y - h, y), **kwargs)
        ax.add_patch(Rectangle((x, y - h), w, h, fill=False, color='lime'))
        x += w + 1
        if x > max_width:
            x = 0
            y -= h
    ax.axis('off')
    ax.set_xlim(0, max_width)
    ax.set_ylim(y - h, 0)