This tool converts a folder of samples to a big rectangular matrix with one mono sample per row.

Samples should be placed in `data/mydataset/samples/`. They could be `.mp3`, `.wav`, or anything else that ffmpeg can work with. They may be all in one folder, or in nested sub-folders.

Change the path below to point to the root directory, e.g., `data/mydataset/`.

The samplerate `sr` is not necessarily the native samplerate of the samples, it's the samplerate you want to load them at.

The output of this notebook is:
* `data/mydataset/durations.txt`
* `data/mydataset/filenames.txt`
* `data/mydataset/samples.npy`

In [1]:
import os
import re
import string
import numpy as np
from os.path import join
from utils import *
from multiprocessing import Pool
import pickle


sampleRootDirectory = os.path.expanduser("~/Documents/Samples")

#The names of the drum classes
drumNames = ["kick", "tom", "snare", "clap", "hi.hat", "ride", "crash"]


#Collect all of the sample file paths as strings
fileNames = []
for directory in os.walk(sampleRootDirectory):
    for file in directory[2]:
        oldPath = directory[0]+"/"+file
        #shutil.copyfile(oldPath, newPath)
        if oldPath[-4:] == '.wav':
            fileNames.append(oldPath)

            
# We assign classes to samples by using regular expressions to see 
# if the sample file path contains the class name

        
#From the drum class names, generate the regular expression used to match against sample file paths
makeRegex = lambda drumStr : '.*'+"".join(map(lambda c : '['+c+c.upper()+']' if c.isalpha() else c, drumStr))+'.*'
drumRegex = [makeRegex(drum) for drum in drumNames]


#filter filenames into sets by matching vs regex
drumFileSets = {}
for i in range(len(drumNames)):
    drumFileSets[drumNames[i]] = {fileName for fileName in fileNames if re.match(drumRegex[i], fileName)}
    

#check if any samples end up in more than 1 class
intersections = []
for i in range(len(drumNames)):
    for j in range(i+1, len(drumNames)):
        d1 = drumNames[i]
        d2 = drumNames[j]
        intersectionSet = drumFileSets[d1] & drumFileSets[d2]
        if len(intersectionSet) > 0:
            intersections.append([d1, d2, intersectionSet])
            #print d1, d2, len(drumFileSets[d1]), len(drumFileSets[d2]), len(intersectionSet)

#note - for some classes, this siginficantly reduces the number of samples
for sect in intersections:
    d1 = sect[0]
    d2 = sect[1]
    sectSet = sect[2]
    drumFileSets[d1] = drumFileSets[d1] - sectSet
    drumFileSets[d2] = drumFileSets[d2] - sectSet

In [2]:
data_root = 'drumData'
sr = 48000
max_length = sr*4 # ignore samples longer than 4 seconds
fixed_length = sr/4 # trim all samples to 250 milliseconds
limit = None # set this to 100 to only load the first 100 samples

In [3]:
# function to extract audio data from files
def load_sample(fn, sr=None,
                max_length=None, fixed_length=None, normalize=True):
    if fn == '': # ignore empty filenames
        return None
    audio, _ = ffmpeg_load_audio(fn, sr, mono=True)
    duration = len(audio)
    if duration == 0: # ignore zero-length samples
        return None
    if max_length and duration >= max_length: # ignore long samples
        return None
    if fixed_length:
        audio.resize(fixed_length)
    max_val = np.abs(audio).max()
    if max_val == 0: # ignore completely silent sounds
        return None
    if normalize:
        audio /= max_val
    return (fn, audio, duration)

In [5]:
# perform extraction of audio data from files
drumSampleSets = {}
for drumName in drumNames:
    files = list(drumFileSets[drumName])
    def job(fn):
        return load_sample(fn, sr=sr,
                           max_length=max_length, fixed_length=fixed_length)
    pool = Pool()
    %time drumSampleSets[drumName] = pool.map(job, files[:limit])
    print 'Processed', len(drumSampleSets[drumName]), 'samples for ', drumName

CPU times: user 195 ms, sys: 322 ms, total: 517 ms
Wall time: 22.9 s
 Processed 5395 samples for  kick
CPU times: user 18.9 ms, sys: 19.7 ms, total: 38.6 ms
Wall time: 3.05 s
Processed 529 samples for  tom
CPU times: user 117 ms, sys: 135 ms, total: 252 ms
Wall time: 12.8 s
Processed 2563 samples for  snare
CPU times: user 62.1 ms, sys: 53.7 ms, total: 116 ms
Wall time: 6.74 s
Processed 1332 samples for  clap
CPU times: user 9.74 ms, sys: 8.73 ms, total: 18.5 ms
Wall time: 880 ms
Processed 167 samples for  hi.hat
CPU times: user 15.4 ms, sys: 14 ms, total: 29.4 ms
Wall time: 1.59 s
Processed 250 samples for  ride
CPU times: user 54.3 ms, sys: 43.1 ms, total: 97.4 ms
Wall time: 7.01 s
Processed 998 samples for  crash


In [6]:
# save audio data as numpy arrays

drumLengths = []

for drumName in drumNames:
    valid = filter(None, drumSampleSets[drumName])
    filenames = [x[0] for x in valid]
    samples = [x[1] for x in valid]
    durations = [x[2] for x in valid]
    samples = np.asarray(samples)
    drumLengths.append(len(samples))
    np.savetxt(join(data_root, drumName+'_filenames.txt'), filenames, fmt='%s')
    np.savetxt(join(data_root, drumName+'_durations.txt'), durations, fmt='%i')
    %time np.save(join(data_root, drumName+'_samples.npy'), samples)
    print 'Saved', len(valid), 'samples of '+drumName

pickle.dump(drumNames, open(data_root+"/drumNames.pickle", "w"))
pickle.dump(drumLengths, open(data_root+"/drumLengths.pickle", "w"))

CPU times: user 9.6 ms, sys: 310 ms, total: 320 ms
Wall time: 423 ms
Saved 5158 samples of kick
CPU times: user 1e+03 µs, sys: 23.7 ms, total: 24.7 ms
Wall time: 32.5 ms
Saved 422 samples of tom
CPU times: user 5.28 ms, sys: 146 ms, total: 152 ms
Wall time: 196 ms
Saved 2546 samples of snare
CPU times: user 2.55 ms, sys: 73.9 ms, total: 76.5 ms
Wall time: 89.9 ms
Saved 1324 samples of clap
CPU times: user 419 µs, sys: 7.82 ms, total: 8.24 ms
Wall time: 14.7 ms
Saved 159 samples of hi.hat
CPU times: user 497 µs, sys: 11.4 ms, total: 11.9 ms
Wall time: 19 ms
Saved 228 samples of ride
CPU times: user 1.51 ms, sys: 47.4 ms, total: 48.9 ms
Wall time: 75.9 ms
Saved 723 samples of crash
