In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import analysis
import recording_selection
import db
import fetcher
from recordings import Recording, RecordingOverrides
from species import CommonName
from trim_recordings import detect_utterances

import IPython
import librosa
import librosa.display
import librosa.feature
import matplotlib
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import numpy as np
import pydub
import scipy.ndimage
import scipy.signal
from tqdm import tqdm

import collections
import hashlib
import io
import itertools
import logging
import multiprocessing
import warnings

warnings.filterwarnings('ignore', category=matplotlib.MatplotlibDeprecationWarning)
plt.rcParams['svg.fonttype'] = 'none'
# logging.getLogger().setLevel(level=logging.DEBUG)

In [None]:
session = db.create_session('master.db')
recordings_fetcher = fetcher.Fetcher('recordings', pool_size=8)
recording_overrides = RecordingOverrides()

Load recordings from the database and filter them according to some selection criteria:
right species, contains the data we need, good quality, not too short and not too long.

Then get them from the cache or download them if necessary.

In [None]:
DUTCH_COMMON_NAME = 'Merel'
COUNT = 12
species = session.query(CommonName).filter(
    CommonName.language_code == 'nl',
    CommonName.common_name == DUTCH_COMMON_NAME,
).one().species
print(species.scientific_name)

In [None]:
selection = recording_selection.RecordingSelection(species, session, recordings_fetcher, recording_overrides)
print(f'Found {len(selection.candidate_recordings)} candidate recordings; loading {COUNT} suitable ones...')

analyses = list(itertools.islice(selection.suitable_recordings(), COUNT))

for a in analyses:
    fig, ax = plt.subplots(figsize=(9, 2))
    librosa.display.specshow(librosa.power_to_db(a.mel_spectrogram),
                             x_axis='time', y_axis='mel', fmin=0.0, fmax=analysis.SAMPLE_RATE / 2,
                             cmap='magma', vmin=-80, vmax=0,
                             hop_length=analysis.FFT_HOP_LENGTH, sr=analysis.SAMPLE_RATE, ax=ax)
    ax.set_title(f'{a.recording.recording_id} - {a.recording.type}')
    ax.title.set_url('https:' + a.recording.url)
    for voc in a.vocalizations:
        ax.add_patch(
            Rectangle((voc.start, 0), voc.end - voc.start, analysis.SAMPLE_RATE / 2,
                      edgecolor='none', facecolor='#00ff0050'))
    plt.show()

    IPython.display.display(IPython.display.Audio(a.sound, rate=analysis.SAMPLE_RATE))

In [None]:
vocalizations = []
for a in analyses:
    vocalizations.extend(a.vocalizations)
print(f'Found {len(vocalizations)} vocalizations')

In [None]:
def show_spectrograms(spectrograms, ax=None, **kwargs):
    max_width = 3000
    x = 0
    y = 0
    for spectrogram in spectrograms:
        h, w = spectrogram.shape
        if x + w > max_width and x > 0:
            x = 0
            y -= h
        ax.imshow(spectrogram, cmap='magma', origin='lower', aspect='auto', extent=(x, x + w, y - h, y), **kwargs)
        ax.add_patch(Rectangle((x, y - h), w, h, fill=False, color='lime'))
        x += w + 1
        if x > max_width:
            x = 0
            y -= h
    ax.axis('off')
    ax.set_xlim(0, max_width)
    ax.set_ylim(y - h, 0)

In [None]:
voc = vocalizations[11]
fig, ax = plt.subplots(3, 1, figsize=(25, 6))
img = librosa.display.specshow(librosa.power_to_db(voc.filtered_mel_spectrogram), x_axis='time', y_axis='mel', ax=ax[0])
fig.colorbar(img, ax=ax[0])
img = librosa.display.specshow(voc.mfccs, x_axis='time', cmap='magma', vmin=-100, vmax=100, ax=ax[1])
fig.colorbar(img, ax=ax[1])
img = librosa.display.specshow(np.array(voc.features).T, x_axis='time', cmap='magma', ax=ax[2])
fig.colorbar(img, ax=ax[2]);

In [None]:
fig, ax = plt.subplots(figsize=(25, 10))
show_spectrograms((librosa.power_to_db(voc.filtered_mel_spectrogram) for voc in vocalizations), ax=ax)

In [None]:
fig, ax = plt.subplots(figsize=(25, 10))
show_spectrograms((voc.mfccs for voc in vocalizations), ax=ax, vmin=0)

In [None]:
fig, ax = plt.subplots(figsize=(25, 10))
show_spectrograms((np.array(voc.features).T for voc in vocalizations), ax=ax)

In [None]:
features = [f for voc in vocalizations for f in voc.features]
features.sort()
print(f'Total features:  {len(features)}')
print(f'Unique features: {len(set(features))}')
fig, ax = plt.subplots(figsize=(25, 10))
show_spectrograms(np.array_split(np.array(features).T, 12, axis=1), ax=ax)

In [None]:
# Abandoned: attempt at similarity metric by sliding one spectrogram across the other.

def similarity(self, other):
    '''
    Computes the similarity between this vocalization and another.
    '''
    threshold = 0.001
    a = self.filtered_mel_spectrogram.copy()
    b = other.filtered_mel_spectrogram.copy()
    a[self.filtered_mel_spectrogram < threshold] = -1.0e-1
    b[other.filtered_mel_spectrogram < threshold] = -1.0e-1
    a[self.filtered_mel_spectrogram >= threshold] = 1.0
    b[other.filtered_mel_spectrogram >= threshold] = 1.0
    # Convolution flips the second array, so we need to pre-flip it here to come out the right way.
    # `np.correlate` does not flip, but only works on 1D arrays.
    max_correlation = np.amax(scipy.signal.convolve(a, np.flip(b), mode='valid'))
    return max_correlation / min(np.sum(a**2), np.sum(b**2))

n = 12 # len(vocalizations)
vocalization_indices = np.arange(0, n)
similarity_matrix = np.zeros((n, n))
for (i, j) in tqdm([(i, j) for i in range(0, n) for j in range(i, n)]):
    s = similarity(vocalizations[i], vocalizations[j])
    similarity_matrix[i, j] = s
    similarity_matrix[j, i] = s
print(similarity_matrix)