In [1]:
import argparse
import sys
import os
import pickle

from tqdm import tqdm
from joblib import Parallel, delayed

import jams
import pumpp
import librosa

import h5py

  from ._conv import register_converters as _register_converters


In [2]:
OUTPUT_PATH = '/scratch/yw3004/projects/deepunet/'

In [2]:
def make_pump(sr, hop_length, n_fft):
    p_stft = pumpp.feature.STFTMag(name='stft',
                                     sr=sr, hop_length=hop_length, n_fft=n_fft,
                                     log=False, conv='tf')

    pump = pumpp.Pump(p_stft)

    # Save the pump
    with open(os.path.join(OUTPUT_PATH, 'pump.pkl'), 'wb') as fd:
        pickle.dump(pump, fd)

    return pump

In [3]:
def normalize(data):
    return (data - data.min()) /(data.max() - data.min())

In [4]:
def root(x):
    return os.path.splitext(os.path.basename(x))[0]

In [5]:
def convert(aud_vocal, aud_inst, pump, outdir):
    #load audio file here
    #mix the audio
    y_vocal, sr_vocal = librosa.load(aud_vocal)
    y_inst, sr_inst = librosa.load(aud_inst)
    y_mix = y_vocal + y_inst
    
    data = {}
    data_mix = pump.transform(y=y_mix, sr=sr_vocal)
    data_inst = pump.transform(aud_inst)
    data['stft/mag'] =  data_mix['stft/mag']
    data['output/mag'] =  data_inst['stft/mag']
    
    #normalize to the range [0,1]
    data['stft/mag'] = normalize(data['stft/mag'])
    data['output/mag'] = normalize(data['output/mag'])
    
    fname = os.path.extsep.join([os.path.join(outdir, root(aud_inst)), 'h5'])
    
    with h5py.File(fname, 'w') as hf:
        hf.update(**data)

In [7]:
output_path = '/scratch/yw3004/projects/deepunet/pump/'

In [8]:
if __name__ == '__main__':
    pump = make_pump(8192, 768, 1024)
    
    AUDIO_VOCAL = jams.util.find_with_extension('/scratch/yw3004/projects/deepunet/tempaudio/unfinished/vocal/', 'mp3')
    AUDIO_INST = jams.util.find_with_extension('/scratch/yw3004/projects/deepunet/tempaudio/unfinished/instrumental/', 'mp3')

    # Make sure there are the same number of files
    assert len(AUDIO_VOCAL) == len(AUDIO_INST)
    # And that they're in agreement
    assert all([root(_1) == root(_2) for (_1, _2) in zip(AUDIO_VOCAL, AUDIO_INST)])
    
    Parallel(n_jobs=20)(delayed(convert)(aud_vocal, aud_inst, pump, os.path.join(output_path, os.path.basename(aud_vocal)[:3])) 
                       for (aud_vocal, aud_inst) in zip(AUDIO_VOCAL, AUDIO_INST))

83481
83480


In [6]:
p_stft = pumpp.feature.STFTMag(name='stft',
                               sr=8192, hop_length=768, n_fft=1024,
                               log=False, conv='tf')

In [7]:
pump = pumpp.Pump(p_stft)

In [8]:
data_inst = pump.transform('/scratch/yw3004/projects/deepunet/audio/instrumental/000/000002.mp3')

In [11]:
data_inst['stft/mag'].shape

(1, 1791, 513, 1)