In [None]:
import numpy as np
from pathlib import Path
import librosa
from collections import defaultdict
import soundfile as sf
import tqdm
import os
from multiprocessing import Pool, cpu_count
import cv2

%matplotlib inline

In [None]:
INPUT =  "../input/train_resampled"
OUTPUT = "../output/train_jpg"
SAMPLE_RATE = 32_000
NUM_WORKERS = cpu_count()

print(NUM_WORKERS)

In [None]:
def audio_to_spec(audio):
    spec = librosa.power_to_db(
        librosa.feature.melspectrogram(audio, sr=SAMPLE_RATE, fmin=20, fmax=16000, n_mels=128)
    )
    return spec.astype(np.float32)

def audio2vec(path):
    x, _ = sf.read(path)
    x_spex = audio_to_spec(x)
    np.save(f"{OUTPUT}/{path.parent.name}/{path.name}.npz", x_spex)
    
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    X = X - mean
    std = std or X.std()
    Xstd = X / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Normalize to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

def audio2pict(path):
    x, _ = sf.read(path)
    x_spex = audio_to_spec(x)
    cv2.imwrite(f"{OUTPUT}/{path.parent.name}/{path.name}.jpg", mono_to_color(x_spex))

In [None]:
recs = defaultdict(list)
for directory in tqdm.tqdm_notebook(Path(INPUT).iterdir(), total=len(os.listdir(INPUT))):
    if directory.name == ".DS_Store":
        continue
    !mkdir -p "{OUTPUT}/{directory.name}"
    file_paths = [f for f in directory.iterdir() if f.name != ".DS_Store"]
    with Pool(NUM_WORKERS // 2) as p:
        #p.map(audio2vec, file_paths)
        p.map(audio2pict, file_paths)

In [None]:
for directory in tqdm.tqdm_notebook(Path(OUTPUT).iterdir(), total=len(os.listdir(OUTPUT))):
    if directory.name == ".DS_Store":
        continue
    file_paths = [f for f in directory.iterdir() if f.name != ".DS_Store"]
    for path in file_paths:
        size = os.path.getsize(path)
        if size < 1:
            print(path)

In [None]:
paths = [
    f"{INPUT}/comrav/XC246425.wav",
    f"{INPUT}/prawar/XC479026.wav",
    f"{INPUT}/snobun/XC487557.wav",
    f"{INPUT}/snobun/XC487556.wav",
    f"{INPUT}/stejay/XC503349.wav"
]

In [None]:
x, _ = sf.read(paths[0])
x_spex = audio_to_spec(x)

print(x_spex.shape)
cv2.imwrite(f"tmp.jpg", mono_to_color(x_spex))