In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import analysis
import db
import fetcher
from recordings import Recording
from species import CommonName
from trim_recordings import detect_utterances

import IPython
import librosa
import librosa.display
import librosa.feature
import matplotlib
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import pydub
import scipy.ndimage
from tqdm import tqdm

import collections
import hashlib
import io
import multiprocessing
import warnings

warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
plt.rcParams['svg.fonttype'] = 'none'

In [None]:
session = db.create_session('master.db')
recordings_fetcher = fetcher.Fetcher('recordings', pool_size=8)

Load recordings from the database and filter them according to some selection criteria:
right species, contains the data we need, good quality, not too short and not too long.

Then get them from the cache or download them if necessary.

In [None]:
def md5(string):
    m = hashlib.md5()
    m.update(string.encode('utf-8'))
    return m.digest()

species = session.query(CommonName).filter(
    CommonName.language_code == 'nl',
    CommonName.common_name == 'Tjiftjaf',
).one().species
genus, species = species.scientific_name.split(' ')

recordings = [
    r for r in session.query(Recording).filter(Recording.genus == genus, Recording.species == species)
    if r.url and r.audio_url and not r.background_species and r.quality == 'A' and 10 <= r.length_seconds <= 120
]
recordings.sort(key=lambda recording: md5(recording.recording_id))
print(f'Found {len(recordings)} candidate recordings')

recordings = recordings[:12]
#recordings = recordings[12:24]
recordings = {r.recording_id: r for r in recordings}

def load_recording(recording):
    data = recordings_fetcher.fetch_cached(recording.audio_url)
    sound = analysis.load_sound(io.BytesIO(data))
    return (recording.recording_id, sound)

pool = multiprocessing.pool.Pool(8)
sounds = dict(tqdm(pool.imap(load_recording, recordings.values(), 1), total=len(recordings)))
pool.close()

Analyze and plot the results, ordered by descending noise volume.

In [None]:
analyses = {}
for recording_id, sound in sounds.items():
    analyses[recording_id] = analysis.Analysis(sound)

for (recording_id, a) in sorted(analyses.items(), key=lambda ia: ia[1].perceptual_noise_volume_db):
    # Create figure.
    fig, ax = plt.subplots(1, 2, figsize=(9, 3), gridspec_kw={'width_ratios': [8, 1]}, sharey='all')
    
    # Display spectrogram on the left.
    img = librosa.display.specshow(librosa.power_to_db(a.mel_spectrogram),
                                   x_axis='time', y_axis='mel', fmin=0.0, fmax=analysis.SAMPLE_RATE / 2,
                                   cmap='magma', vmin=-80, vmax=0,
                                   hop_length=analysis.FFT_HOP_LENGTH, sr=analysis.SAMPLE_RATE, ax=ax[0])
    ax[0].set_title(f'{recording_id} - {recordings[recording_id].type}')
    ax[0].title.set_url('https:' + recordings[recording_id].url)
    
    # Display vocalizations as transparent green rectangles on top of the spectrogram.
    for (start, end) in a.vocalizations:
        ax[0].add_patch(
            Rectangle((start, 0), end - start, analysis.SAMPLE_RATE / 2,
                      edgecolor='none', facecolor='#00ff0050'))
    
    # Plot volume of noise-filtered signal on top of spectrogram.
    ax_right = ax[0].twinx()
    xs = (np.arange(0, a.mel_spectrogram.shape[1]) + 0.5) * analysis.FFT_HOP_LENGTH / analysis.SAMPLE_RATE
    ax_right.plot(xs, a.filtered_volume_db,
                  linewidth=1.0)
    ax_right.set_ylim(-80, 0)
    
    # Add lines at our vocalization thresholds.
    ax_right.axhline(a.noise_volume_db, color='red')
    ax_right.axhline(a.noise_volume_db + analysis.VOCALIZATION_TRIGGER_THRESHOLD_DB, color='yellow')
    ax_right.axhline(a.noise_volume_db + analysis.VOCALIZATION_KEEP_THRESHOLD_DB, color='green')
    
    # Display noise profile on the right.
    img = librosa.display.specshow(librosa.power_to_db(a.noise_profile),
                                   y_axis='mel', fmin=0.0, fmax=analysis.SAMPLE_RATE / 2, ax=ax[1],
                                   cmap='magma', vmin=-80, vmax=0)
    ax[1].set_xticks([])
    ax[1].set_ylabel(None)
    ax[1].set_title(f'Noise: {a.perceptual_noise_volume_db:.1f} dB')
    fig.colorbar(img, ax=ax[1])
    
    plt.tight_layout()
    plt.show()
    
    IPython.display.display(IPython.display.Audio(sounds[recording_id], rate=analysis.SAMPLE_RATE))

In [None]:
# Just a large empty cell to end with, to prevent scrolling when re-evaluating the output of the previous cell.





















































