In [2]:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
import multiprocessing
import librosa
import os
from tqdm import tqdm_notebook
import sys
from itertools import islice
from random import shuffle
from sklearn.cluster import MiniBatchKMeans
from time import time
from sklearn.externals import joblib

In [22]:
# Directory containing raw audio files
AUDIO_DIR = '/Volumes/thesis/audio/'
# Directory to write features to
MFCC_WRITE_DIR = 'data/features/mfcc/'
MEL_WRITE_DIR = 'data/features/mel_spec/'
MEL_ALL_WRITE_DIR = '/Volumes/thesis/features/mel_spec_all_padded/'
MFCC_ALL_WRITE_DIR = 'data/features/mfcc_all_unpadded/'
BOW_WRITE_DIR = 'data/features/bow_2000'

In [13]:
BOW_SIZE = 2000

In [19]:
# Load AllMusic data
artists = pd.read_csv('data/allmusic/artists_cleaned.csv')

# Create Mel-frequency cepstral coefficient (MFCC) representations of first track for each artist we have audio for

In [None]:
# key is artist id, value is a MFCC representation of first track for artist
mfcc_dict = {}

In [None]:
for artist in tqdm_notebook(os.listdir(AUDIO_DIR)):
    first_track = None
    
    for track in os.listdir(AUDIO_DIR + artist):
        # Find the first track (zero-indexed)
        if track.startswith('0'):
            first_track = track
            break
    
    # Create MFCC representation of track
    if first_track is not None:
        try:
            y, sr = librosa.load(AUDIO_DIR + '{}/{}'.format(artist, first_track))
            mfcc_dict[artist] = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)    
        except Exception as e:
            print e

In [None]:
# Compute maximum dimensions for mfcc feature matrix
shapes = []

for i, item in mfcc_dict.items():
    shapes.append(item.shape)

max_dim = np.max(shapes, axis=0)

for i, item in mfcc_dict.items():
    # Zero pad so that all matrices are the same size
    padded = np.zeros(max_dim)
    padded[:,:mfcc_dict[i].shape[1]] = mfcc_dict[i]
    mfcc_dict[i] = padded

In [None]:
# Write mfcc arrays to files
for i, mfcc in tqdm_notebook(mfcc_dict.items()):
    np.save(MFCC_WRITE_DIR + '{}.npy'.format(i), mfcc)

# MFCC Extraction for all tracks for all artists

In [None]:
for artist in tqdm_notebook(os.listdir(AUDIO_DIR)):
    # Create directory for each artist if it does not exist yet
    artist_mfcc_path = MFCC_ALL_WRITE_DIR + artist
    
    if not os.path.isdir(artist_mfcc_path):
        os.makedirs(artist_mfcc_path)
    
    for track in os.listdir(AUDIO_DIR + artist):
        # Create MFCC representation of track
        try:
            y, sr = librosa.load(AUDIO_DIR + '{}/{}'.format(artist, track))
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            np.save(artist_mfcc_path + '/{}.npy'.format(track.decode('utf-8').split('.mp3')[0].encode('utf-8')), mfcc)
        except Exception as e:
            print artist, track
            print e
            

## Calculate mean and standard deviation of each MFCC to normalize

In [6]:
frame_count = 0
mfcc_sum = np.zeros(13,)

for artist in tqdm_notebook(os.listdir(MFCC_ALL_WRITE_DIR)):
    for song in os.listdir(MFCC_ALL_WRITE_DIR + artist):
        mfcc = np.load(MFCC_ALL_WRITE_DIR + artist + '/' + song)
        mfcc_sum += mfcc.sum(axis=1)
        frame_count += mfcc.shape[1]

A Jupyter Widget




In [11]:
mfcc_means

array([-134.21148862,  105.97183766,  -10.34393744,   34.95313303,
         -2.4342333 ,   13.40578362,   -5.42583774,    8.09681016,
         -6.18242169,    4.73996903,   -4.18510326,    1.60272867,
         -3.045377  ])

In [None]:
np.save('kmeans_helpers/mfcc_means.npy', mfcc_means)

In [13]:
sq_dev_sum = np.zeros(13,)

for artist in tqdm_notebook(os.listdir(MFCC_ALL_WRITE_DIR)):
    for song in os.listdir(MFCC_ALL_WRITE_DIR + artist):
        mfcc = np.load(MFCC_ALL_WRITE_DIR + artist + '/' + song)
        sq_dev_sum += ((mfcc.T - mfcc_means.T).T ** 2).sum(axis=1).reshape(-1, 1)
        
mfcc_stds = np.sqrt(sq_dev_sum / frame_count)

A Jupyter Widget




ValueError: non-broadcastable output operand with shape (13,) doesn't match the broadcast shape (13,13)

In [None]:
np.save('kmeans_helpers/mfcc_stds.npy', mfcc_stds)

## Create codebook for normalized MFCCs via streaming kmeans clustering

In [15]:
mfcc_means = np.load('kmeans_helpers/mfcc_means.npy')
mfcc_stds = np.load('kmeans_helpers/mfcc_stds.npy')

In [10]:
paths = []

for artist in os.listdir(MFCC_ALL_WRITE_DIR):
    for song in os.listdir(MFCC_ALL_WRITE_DIR + artist):
        paths.append(MFCC_ALL_WRITE_DIR + artist + '/' + song)
        
# Shuffle path names
shuffle(paths)

In [11]:
def generate_batch(paths, batch_size=1000):
    """Given an iterable of paths to mfcc vectors, generate batches for streaming"""
    l = len(paths)
    for ndx in range(0, l, batch_size):
        yield paths[ndx:min(ndx + batch_size, l)]

In [12]:
kmeans = MiniBatchKMeans(n_clusters=BOW_SIZE)
count = 0

for batch in generate_batch(paths, 1000):
    count += 1
    print "Fitting batch", count
    
    X = []
    
    # Read in mfcc vectors and normalize
    for path in batch:
        # Shape is (13, num_frames)
        mfcc = np.load(path)
        # Normalize by subtracting mean and dividing by std_dev
        mfcc_norm = (mfcc.T - mfcc_means) / mfcc_stds
        
        for frame in mfcc_norm:
            X.append(frame)
    
    # Update kmeans using batch
    kmeans.partial_fit(X)

Fitting batch 1
Fitting batch 2
Fitting batch 3
Fitting batch 4
Fitting batch 5
Fitting batch 6
Fitting batch 7
Fitting batch 8
Fitting batch 9
Fitting batch 10
Fitting batch 11
Fitting batch 12
Fitting batch 13
Fitting batch 14
Fitting batch 15
Fitting batch 16
Fitting batch 17
Fitting batch 18
Fitting batch 19
Fitting batch 20
Fitting batch 21
Fitting batch 22
Fitting batch 23
Fitting batch 24
Fitting batch 25
Fitting batch 26
Fitting batch 27
Fitting batch 28
Fitting batch 29
Fitting batch 30
Fitting batch 31
Fitting batch 32
Fitting batch 33
Fitting batch 34
Fitting batch 35
Fitting batch 36
Fitting batch 37
Fitting batch 38
Fitting batch 39
Fitting batch 40
Fitting batch 41
Fitting batch 42
Fitting batch 43
Fitting batch 44
Fitting batch 45
Fitting batch 46
Fitting batch 47
Fitting batch 48
Fitting batch 49
Fitting batch 50
Fitting batch 51
Fitting batch 52
Fitting batch 53
Fitting batch 54
Fitting batch 55
Fitting batch 56
Fitting batch 57
Fitting batch 58
Fitting batch 59
Fittin

In [None]:
joblib.dump(kmeans, 'kmeans_helpers/kmeans_2000.pkl')

['kmeans_helpers/kmeans_2000.pkl']

# Bag of Words Representation for MFCC features using Kmeans quantization

In [3]:
kmeans = joblib.load('kmeans_helpers/kmeans_2000.pkl')

In [23]:
for artist in tqdm_notebook(os.listdir(MFCC_ALL_WRITE_DIR)):
    # Create directory for each artist if it does not exist yet
    artist_bow_path = BOW_WRITE_DIR + artist
    
    if not os.path.isdir(artist_bow_path):
        os.makedirs(artist_bow_path)
    
    for song in os.listdir(MFCC_ALL_WRITE_DIR + artist):
        try:
            X = []
            bow = [0 for _ in range(BOW_SIZE)]

            mfcc = np.load(MFCC_ALL_WRITE_DIR + artist + '/' + song)
            # Normalize by subtracting mean and dividing by std_dev
            mfcc_norm = (mfcc.T - mfcc_means) / mfcc_stds

            for frame in mfcc_norm:
                X.append(frame)

            # Give cluster assignments for each frame
            cluster_assign = kmeans.predict(X)
            for cluster in cluster_assign:
                bow[cluster] += 1

            # Save bow feature representation
            np.save(BOW_WRITE_DIR + artist + '/{}.npy'.format(song.decode('utf-8').split('.npy')[0].encode('utf-8')), bow)
        except Exception as e:
            print e

A Jupyter Widget

# Create Mel Spectrogram representations for each first track we have audio for

In [None]:
for artist in tqdm_notebook(os.listdir(AUDIO_DIR)):
    first_track = None
    
    for track in os.listdir(AUDIO_DIR + artist):
        # Find the first track (zero-indexed)
        if track.startswith('0'):
            first_track = track
            break
    
    # Create mel representation of track
    if first_track is not None:
        try:
            y, sr = librosa.load(AUDIO_DIR + '{}/{}'.format(artist, first_track))
            mel_spec = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
            # Add zero padding
            padded = np.zeros((128, 1298))
            padded[:,:mel_spec.shape[1]] = mel_spec
            np.save(MEL_WRITE_DIR + '{}.npy'.format(artist), padded)
        except Exception as e:
            print e

# Create Mel Spectrogram representations for each track

In [21]:
for artist in tqdm_notebook(os.listdir(AUDIO_DIR)):
    # Create directory for each artist if it does not exist yet
    artist_mel_path = MEL_ALL_WRITE_DIR + artist
    
    if not os.path.isdir(artist_mel_path):
        os.makedirs(artist_mel_path)

    for track in os.listdir(AUDIO_DIR + artist):
        try:
            # Create mel representation of track
            y, sr = librosa.load(AUDIO_DIR + '{}/{}'.format(artist, track))
            mel_spec = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
            # Add zero padding
            padded = np.zeros((128, 1298))
            padded[:,:mel_spec.shape[1]] = mel_spec
            np.save(MEL_ALL_WRITE_DIR + artist + '/{}.npy'.format(os.path.splitext(track)[0]), padded)
        except Exception as e:
            print artist
            print song
            print e

A Jupyter Widget

KeyboardInterrupt: 

In [5]:
# Parallelized version
inputs = os.listdir(AUDIO_DIR)

def create_mel_spec(artist):
    artist_mel_path = MEL_ALL_WRITE_DIR + artist
    
    if not os.path.isdir(artist_mel_path):
        os.makedirs(artist_mel_path)

    for track in os.listdir(AUDIO_DIR + artist):
        try:
            # Create mel representation of track
            y, sr = librosa.load(AUDIO_DIR + '{}/{}'.format(artist, track))
            mel_spec = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
            # Add zero padding
            padded = np.zeros((128, 1298))
            padded[:,:mel_spec.shape[1]] = mel_spec
            np.save(MEL_ALL_WRITE_DIR + artist + '/{}.npy'.format(os.path.splitext(track)[0]), padded)
        except Exception as e:
            print artist
            print track
            print e

start = time()
Parallel(n_jobs=multiprocessing.cpu_count())(delayed(create_mel_spec)(artist) for artist in inputs)
print time() - start

0000021009
3_No More Heroes.mp3

0000033286
1_Trip to New Orleans.mp3

0000036367
6_The Great Out There.mp3

0000053204
3_Não Quero Saber Mais Dela.mp3

0000057073
0_Poison in Your Brain.mp3

0000059537
9_Saturday Freedom.mp3

0000070349
3_It Will Be Alright With Me.mp3

0000071514
2_Fussing and Fighting.mp3

0000078160
7_New Forms.mp3

0000082831
3_I'm Making Believe.mp3

0000083097
1_Break the Silence.mp3

0000102964
8_Fuck America.mp3

0000104035
8_That's Where My Money Goes.mp3

0000107139
3_Don't Let 'Em.mp3

0000125528
9_Cocaine Cool [Extended Vol. 2].mp3

0000127044
4_Jeepers Creepers.mp3

0000157314
5_Throw Your Hands.mp3

0000159052
5_The Weight.mp3

0000159697
4_Ten Toes Down.mp3

0000161173
8_Interviews.mp3

0000169341
2_Menage a Trois.mp3

0000178345
5_Don't Wann Fall in Love.mp3

0000178852
3_Gotta Get Mine.mp3

0000180228
3_Rip It Up.mp3

0000190951
9_Atomic Bass Cats.mp3

0000194875
4_Sleepin' on My Couch.mp3

0000195868
5_Funk Yard.mp3

0000198783
1_I'm Coming Home.mp3

JoblibOSError: JoblibOSError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x10617e130, file "/Use...2.7/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/Users/harry...python2.7/site-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname='/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x10617e130, file "/Use...2.7/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/Users/harry...python2.7/site-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 2, 23, 0, 47, 7, 838430, tzinfo=tzutc()), u'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', u'msg_type': u'execute_request', u'session': u'97C829145B1F46CC859C783764DE13E6', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', 'msg_type': u'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['97C829145B1F46CC859C783764DE13E6']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 2, 23, 0, 47, 7, 838430, tzinfo=tzutc()), u'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', u'msg_type': u'execute_request', u'session': u'97C829145B1F46CC859C783764DE13E6', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', 'msg_type': u'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['97C829145B1F46CC859C783764DE13E6'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 2, 23, 0, 47, 7, 838430, tzinfo=tzutc()), u'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', u'msg_type': u'execute_request', u'session': u'97C829145B1F46CC859C783764DE13E6', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'629365E2C2A9474F9B3053EF217D9D00', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.FunctionDef object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Print object>], cell_name='<ipython-input-5-689fbbcd9013>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 107fe9b90, execution_..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x10ed270b0, file "<ipython-input-5-689fbbcd9013>", line 25>
        result = <ExecutionResult object at 107fe9b90, execution_..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x10ed270b0, file "<ipython-input-5-689fbbcd9013>", line 25>, result=<ExecutionResult object at 107fe9b90, execution_..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x10ed270b0, file "<ipython-input-5-689fbbcd9013>", line 25>
        self.user_global_ns = {'AUDIO_DIR': '/Volumes/thesis/audio/', 'BOW_WRITE_DIR': 'data/features/bow_1000/', 'In': ['', u'\nimport numpy as np\nimport pandas as pd\nfro...mport time\nfrom sklearn.externals import joblib', u"# Directory containing raw audio files\nAUDIO_...ded/'\nBOW_WRITE_DIR = 'data/features/bow_1000/'", u"# Directory containing raw audio files\nAUDIO_...ded/'\nBOW_WRITE_DIR = 'data/features/bow_1000/'", u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start"], 'MEL_ALL_WRITE_DIR': '/Volumes/thesis/features/mel_spec_all_padded/', 'MEL_WRITE_DIR': 'data/features/mel_spec/', 'MFCC_ALL_WRITE_DIR': 'data/features/mfcc_all_unpadded/', 'MFCC_WRITE_DIR': 'data/features/mfcc/', 'MiniBatchKMeans': <class 'sklearn.cluster.k_means_.MiniBatchKMeans'>, 'Out': {}, 'Parallel': <class 'joblib.parallel.Parallel'>, ...}
        self.user_ns = {'AUDIO_DIR': '/Volumes/thesis/audio/', 'BOW_WRITE_DIR': 'data/features/bow_1000/', 'In': ['', u'\nimport numpy as np\nimport pandas as pd\nfro...mport time\nfrom sklearn.externals import joblib', u"# Directory containing raw audio files\nAUDIO_...ded/'\nBOW_WRITE_DIR = 'data/features/bow_1000/'", u"# Directory containing raw audio files\nAUDIO_...ded/'\nBOW_WRITE_DIR = 'data/features/bow_1000/'", u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start", u"# Parallelized version\ninputs = os.listdir(AU...ist) for artist in inputs)\nprint time() - start"], 'MEL_ALL_WRITE_DIR': '/Volumes/thesis/features/mel_spec_all_padded/', 'MEL_WRITE_DIR': 'data/features/mel_spec/', 'MFCC_ALL_WRITE_DIR': 'data/features/mfcc_all_unpadded/', 'MFCC_WRITE_DIR': 'data/features/mfcc/', 'MiniBatchKMeans': <class 'sklearn.cluster.k_means_.MiniBatchKMeans'>, 'Out': {}, 'Parallel': <class 'joblib.parallel.Parallel'>, ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
/Users/harryxue/Desktop/music_influence/<ipython-input-5-689fbbcd9013> in <module>()
     20             print artist
     21             print track
     22             print e
     23 
     24 start = time()
---> 25 Parallel(n_jobs=multiprocessing.cpu_count())(delayed(create_mel_spec)(artist) for artist in inputs)
     26 print time() - start

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=4), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=4)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
OSError                                            Fri Feb 23 09:48:28 2018
PID: 1679               Python 2.7.13: /Users/harryxue/anaconda2/bin/python
...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/site-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function create_mel_spec>
        args = ('0000890799',)
        kwargs = {}
        self.items = [(<function create_mel_spec>, ('0000890799',), {})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/Users/harryxue/Desktop/music_influence/<ipython-input-5-689fbbcd9013> in create_mel_spec(artist='0000890799')
      3 
      4 def create_mel_spec(artist):
      5     artist_mel_path = MEL_ALL_WRITE_DIR + artist
      6     
      7     if not os.path.isdir(artist_mel_path):
----> 8         os.makedirs(artist_mel_path)
      9 
     10     for track in os.listdir(AUDIO_DIR + artist):
     11         try:
     12             # Create mel representation of track

...........................................................................
/Users/harryxue/anaconda2/lib/python2.7/os.py in makedirs(name='/Volumes/thesis/features/mel_spec_all_padded/0000890799', mode=511)
    152             # be happy if someone already created the path
    153             if e.errno != errno.EEXIST:
    154                 raise
    155         if tail == curdir:           # xxx/newdir/. exists if xxx/newdir exists
    156             return
--> 157     mkdir(name, mode)
        name = '/Volumes/thesis/features/mel_spec_all_padded/0000890799'
        mode = 511
    158 
    159 def removedirs(name):
    160     """removedirs(path)
    161 

OSError: [Errno 28] No space left on device: '/Volumes/thesis/features/mel_spec_all_padded/0000890799'
___________________________________________________________________________