In [49]:
#!/usr/bin/env python

import argparse
import sys
import os
import pickle

from tqdm import tqdm
from joblib import Parallel, delayed

from jams.util import smkdirs
import pumpp

import h5py

  from ._conv import register_converters as _register_converters


In [51]:
OUTPUT_PATH = 'resources'

In [46]:
def process_arguments(args):
    parser = argparse.ArgumentParser(description=__doc__)

    parser.add_argument('--sample-rate', dest='sr', type=float, default=44100.,
                        help='Sampling rate for audio analysis')

    parser.add_argument('--hop-length', dest='hop_length', type=int,
                        default=512,
                        help='Hop length for audio analysis')
    
    parser.add_argument('--nfft', dest='nfft', type=int,
                        default=4096,
                        help='Number of FFT for STFT')

    parser.add_argument('--jobs', dest='n_jobs', type=int,
                        default=1,
                        help='Number of jobs to run in parallel')

    parser.add_argument('input_path', type=str,
                        help='Path for directory containing (audio, jams)')

    parser.add_argument('output_path', type=str,
                        help='Path to store pump output')

    return parser.parse_args(args)

In [48]:
def root(x):
    return os.path.splitext(os.path.basename(x))[0]

In [47]:
def make_pump(sr, hop_length, n_fft):
    p_stft = pumpp.feature.STFTMag(name='stft',
                                     sr=sr, hop_length=hop_length, n_fft=n_fft,
                                     log=False, conv='tf')

    pump = pumpp.Pump(p_stft)

    # Save the pump
    with open(os.path.join(OUTPUT_PATH, 'pump.pkl'), 'wb') as fd:
        pickle.dump(pump, fd)

    return pump

In [50]:
def convert(aud_mix, aud_inst, pump, outdir):
    data = pump.transform(aud_mix)
    data_inst = pump.transform(aud_inst)
    data['output/mag'] = data_inst['stft/mag']
    
    fname = os.path.extsep.join([os.path.join(outdir, root(aud_mix)), 'h5'])
    
    with h5py.File(fname, 'w') as hf:
        hf.update(**data)

In [None]:
if __name__ == '__main__':
    params = process_arguments(sys.argv[1:])
    smkdirs(OUTPUT_PATH)
    smkdirs(params.output_path)

    print('{}: pre-processing'.format(__doc__))
    print(params)
    pump = make_pump(params.sr, params.hop_length, params.nfft)

    stream = tqdm(crema.utils.get_ann_audio(params.input_path),
                  desc='Converting training data')
    
    Parallel(n_jobs=params.n_jobs)(delayed(convert)(aud, ann,
                                                    pump,
                                                    params.output_path)
                                   for aud, ann in stream)