In [1]:
import math
import glob

import matplotlib.pyplot as plt
import librosa 
import torchaudio
import librosa.display
import pandas as pd
import soundfile as sf

from fastai_audio.audio import * 

from fastai.vision import *

In [2]:
# Set a seed for reproducability
torch.manual_seed(0)

from fastai_audio.audio import SpectrogramConfig, AudioConfig

path = Path("output/")

sg_cfg = SpectrogramConfig(
    f_min=200.0,
    f_max=1000.0,
    hop_length=32,
    n_fft=128,
    n_mels=64,
    pad=0,
    win_length=None
)

config = AudioConfig(
    use_spectro=True,
    #delta=True,
    sg_cfg=sg_cfg,
)

al = AudioList.from_folder(path, config=config).split_by_rand_pct(.2, seed=4).label_from_folder()

In [3]:
tfms = None
#tfms = get_spectro_transforms(mask_time=False, mask_freq=True, roll=False, num_rows=12)
tfms = get_spectro_transforms(
    size=(128, 626), # Upscale the spectrograms from 64x313
    mask_frequency=False, # Don't mask frequencies
    mask_time=False # Don't mask time
)
db = al.transform(tfms).databunch(bs=10)

In [None]:
from fastai.vision import models

from fastai_audio.audio import *

learn = audio_learner(db, base_arch=models.resnet50)
learn = learn.load("weight_decay_more_data_465")
learn.model.eval()

In [6]:
# Sanity check that our model can identify samples we trained it on
tmpfile="output/damselfish/20.wav"
item = AudioItem(path=tmpfile)
(category, _, _) = audio_predict(learn, item)
category

Category damselfish

In [7]:
from subprocess import call, check_output

def fetch_recording(gs_path):
  return_code = call(["gsutil", "cp", gs_path, "recordings/"])
  print(return_code)

def list_all_recordings():
  return check_output(["gsutil", "ls", "gs://sonumator/recordings/2015-2016/*.wav"]).decode("utf-8").splitlines()

In [8]:
import torchaudio
import pandas as pd
import soundfile as sf
import os

def search_file_for_samples(gs_filepath, model, offset=2):
    fetch_recording(gs_filepath)
    filepath = f"recordings/{os.path.basename(gs_filepath)}"
    df = pd.DataFrame(columns=["start", "end", "filepath"])

    si, ei = torchaudio.info(filepath)
    length = si.length / si.rate

    for i in range(0, int(length), offset):
        end = i + offset
        
        basename = os.path.basename(filepath)[:-4]

        y, sr = librosa.load(
            filepath,
            sr=5000,
            offset=i,
            duration=2,
        )
        tmpfile = f"potentials/{basename}-{i}.wav"
        sf.write(tmpfile, y, 5000)
        item = AudioItem(path=tmpfile)
        category, _, _ = audio_predict(learn, item)
        if str(category) == "damselfish":
            df = df.append([{"start": i, "end": end, "filepath": filepath}])
        os.remove(tmpfile)
    
    os.remove(filepath)
    
    return df

In [9]:
all_files = list_all_recordings()

In [10]:
import concurrent.futures

def search_all_files(file_list):
    completed_dataframes = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_df = {executor.submit(search_file_for_samples, gs_filepath, learn): gs_filepath for gs_filepath in file_list}
        for future in concurrent.futures.as_completed(future_to_df):
            url = future_to_df[future]
            try:
                df = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (url, exc))
            else:
                completed_dataframes.append(df)

    return pd.concat(completed_dataframes)

In [None]:
import time
start_time = time.time()
df = search_all_files(all_files[0:4])
print("--- %s seconds ---" % (time.time() - start_time))