In [6]:
# Feature extraction example
import numpy as np
import librosa

# Load the example clip
y, sr = librosa.load(librosa.util.example_audio_file())

# Set the hop length; at 22050 Hz, 512 samples ~= 23ms
hop_length = 512

# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)

# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
                                             sr=sr)

# Compute MFCC features from the raw signal
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

# And the first-order differences (delta features)
mfcc_delta = librosa.feature.delta(mfcc)

# Stack and synchronize between beat events
# This time, we'll use the mean value (default) instead of median
beat_mfcc_delta = librosa.util.sync(np.vstack([mfcc, mfcc_delta]),
                                    beat_frames)

# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic,
                                        sr=sr)

# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.util.sync(chromagram,
                                beat_frames,
                                aggregate=np.median)

# Finally, stack all beat-synchronous features together
beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

In [7]:
print (mfcc)

[[-522.94530523 -494.40971746 -402.84208817 ... -522.94530523
  -522.94530523 -522.94530523]
 [   0.           37.86774603  125.67502994 ...    0.
     0.            0.        ]
 [   0.           31.91965924   48.4149885  ...    0.
     0.            0.        ]
 ...
 [   0.            5.12084665    7.43777421 ...    0.
     0.            0.        ]
 [   0.            1.92254366    5.80279028 ...    0.
     0.            0.        ]
 [   0.            1.47176985    4.57520636 ...    0.
     0.            0.        ]]


In [8]:
print(chromagram)

[[0.39551519 0.35764096 0.26434521 ... 0.41130082 0.50761292 0.39690243]
 [0.76212404 0.36956236 0.1708703  ... 0.42636087 0.53628378 0.80657137]
 [0.8200356  0.62771595 0.33674228 ... 0.43594053 0.72355858 0.70965668]
 ...
 [0.05291524 0.13488453 0.08384524 ... 0.19609585 0.212261   0.19203956]
 [0.28944725 0.22769627 0.12337843 ... 0.2924722  0.28554613 0.23155668]
 [0.3712201  0.292441   0.21141882 ... 0.35494624 0.41183826 0.5145329 ]]
