In [None]:
import librosa
import numpy as np
import torch
import torchaudio
import matplotlib.pyplot as plt
import os

from birdclassification.preprocessing.utils import *
from birdclassification.preprocessing.spectrogram import generate_mel_spectrogram, normalize_spectrogram_for_display
from birdclassification.visualization.plots import *

In [None]:
input_filename = '../../../tests/sounds/cygnus_olor_2.mp3'
sr = 32000
n_fft = 512 
hop_length = 3 * 128
sample_length = 3

In [None]:
def plot_torch_spectrogram(specgram, title=None, ylabel="freq_bin", ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1)
    if title is not None:
        ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.imshow(librosa.power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")

In [None]:
y, sr_original = torchaudio.load(input_filename)
y, sr_original

In [None]:
if sr_original != sr:
  y = torchaudio.transforms.Resample(orig_freq=sr_original, new_freq=sr)(y)

y, sr

In [None]:
spectrogram = generate_mel_spectrogram(y, sr, n_fft, hop_length)
spectrogram

In [None]:
array_to_image(normalize_spectrogram_for_display(spectrogram))

In [None]:
@timer
def create_spectrogram_torch(y):
  transform = torchaudio.transforms.MelSpectrogram(sample_rate=sr, n_fft=n_fft, hop_length=hop_length, f_min=150, f_max=15000, n_mels=64)
  return transform(y)[0]

spectrogram_torch = create_spectrogram_torch(y)

plt.figure(figsize=(12, 6))
plt.imshow(librosa.power_to_db(spectrogram_torch), origin="lower", aspect="auto")

In [None]:
loudness = convert_waveform_to_loudness(y, n_fft, hop_length)

plt.figure(figsize=(12, 6))
plt.plot(librosa.power_to_db(loudness), color='blue')
plt.grid()

In [None]:
peak = get_loudest_index(y, n_fft, hop_length)
peak

In [None]:
peak / hop_length

In [None]:
loudness.argmax()

In [None]:
y_cut = cut_around_index(y, peak, sr * sample_length)
y_cut

In [None]:
spectrogram_cut = generate_mel_spectrogram(y_cut, sr, n_fft, hop_length)
array_to_image(normalize_spectrogram_for_display(spectrogram_cut))

In [None]:
start_index = None
end_index = None

for i in range(y.size(1) - y_cut.size(1) + 1):
    if torch.all(y[:, i:i + y_cut.size(1)] == y_cut):
        start_index = i
        end_index = i + y_cut.size(1) - 1
        break
    
start_index, end_index

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(librosa.power_to_db(loudness), color='blue')
plt.axvspan(start_index / hop_length, end_index / hop_length, color='red', alpha=0.5)
plt.grid()

In [None]:
plt.figure(figsize=(12, 6))
plt.imshow(librosa.power_to_db(spectrogram_torch), origin="lower", aspect="auto")
plt.axvspan(start_index / hop_length, end_index / hop_length, color='red', alpha=0.25)