This notebook times the difference between librosa's load function and numpy's load function.
The faster of the two is chosen for the DataGenerator.

In [4]:
import os
from os.path import join
from typing import List

import librosa
import numpy as np

from utils import Timer
from utils.constants import SAMPLE_RATE

PATH_TO_AUDIO_DATA_SET: str = '../../data-sets/processed_dataset'
PATH_TO_NUMPY_DATA_SET: str = '../../data-sets/processed_dataset_numpy'

### Librosa Load

In [2]:
ontology: List[str] = os.listdir(PATH_TO_AUDIO_DATA_SET)

for instrument in ontology:
    path_to_samples: str = join(PATH_TO_AUDIO_DATA_SET, instrument)

    with Timer() as timer:
        for sample in os.listdir(path_to_samples):  # Loop through all samples and load the data using librosa.
            path_to_single_sample: str = join(path_to_samples, sample)

            librosa.load(path_to_single_sample, mono=True, sr=SAMPLE_RATE)

    print(f'{instrument}: {round(timer.elapsed, 4)} seconds')

flute: 4.4712 seconds
guitar: 57.5286 seconds
string: 44.3249 seconds
bass: 18.6763 seconds
brass: 30.5929 seconds
piano: 14.639 seconds
strings: 4.7515 seconds
vocal: 10.3285 seconds
synth: 12.3647 seconds
organ: 94.0101 seconds
reed: 42.8214 seconds


### Numpy Load

In [3]:
ontology: List[str] = os.listdir(PATH_TO_NUMPY_DATA_SET)

for instrument in ontology:
    path_to_samples: str = join(PATH_TO_NUMPY_DATA_SET, instrument)

    with Timer() as timer:
        for sample in os.listdir(path_to_samples):  # Loop through all samples and load the data using librosa.
            path_to_single_sample: str = join(path_to_samples, sample)

            np.load(path_to_single_sample)

    print(f'{instrument}: {round(timer.elapsed, 4)} seconds')

flute: 18.8751 seconds
guitar: 94.6811 seconds
string: 66.2246 seconds
bass: 28.1576 seconds
brass: 46.2456 seconds
piano: 22.2618 seconds
strings: 6.5311 seconds
vocal: 15.2019 seconds
synth: 18.6608 seconds
organ: 139.5007 seconds
reed: 66.0808 seconds
