In [35]:
%pylab inline
from __future__ import print_function
import pickle
import os
import IPython

import numpy as np
import scipy
import sklearn.mixture

# Path for all pre-computed chroma
DATA_DIR = '/home/py/projects/dataset/beatles/beatchromlabs/'

Populating the interactive namespace from numpy and matplotlib


In [36]:
# Read in the list of training file IDs.
def read_file_list(filename):
    """Read a text file with one item per line."""
    items = []
    with open(filename, 'r') as f:
        for line in f:
            items.append(line.strip())
    return items

def read_beat_chroma_labels(file_id):
    """Read back a precomputed beat-synchronous chroma record."""
    filename = os.path.join(os.path.join(DATA_DIR, 'beatchromlabs', file_id + '.pkl'))
    with open(filename, "rb") as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        beat_times, chroma_features, label_indices = u.load()
        # beat_times, chroma_features, label_indices = pickle.load(f)
    #chroma_features = chroma_features**0.25
    chroma_features /= np.maximum(0.01, np.max(chroma_features, axis=1))[:, np.newaxis]
    return beat_times, chroma_features, label_indices

def load_all_features_labels(train_ids):
    """Load all the features and labels from a lit into big arrays."""
    features = []
    labels = []
    for train_id in train_ids:
        beat_times, chroma, label = read_beat_chroma_labels(train_id)
        assert not np.any(np.isnan(chroma))
        features.append(chroma)
        labels.append(label)
    features = np.concatenate(features)
    labels = np.concatenate(labels)
    print('Training features shape:', features.shape)
    return features, labels

def estimate_transitions(labels, num_models):
    # Count the number of transitions in the label set.
    # Each element of gtt is a 4 digit number indicating one transition 
    # e.g. 2400 for 24 -> 0.
    hashed_transitions = 100*labels[:-1] + labels[1:]
    # Arrange these into the transition matrix by counting each type.
    transitions = np.zeros((num_models, num_models))
    for i in range(num_models):
        for j in range(num_models):
            transition_hash = 100 * i + j 
            # Add one to all counts, so no transitions have zero 
            # probability.
            transitions[i, j] = 1 + np.count_nonzero(hashed_transitions == 
                                                     transition_hash)
    # Priors of each chord = total count of pairs starting in that chord.
    priors = np.sum(transitions, axis=1)
    # Normalize each row of transitions.
    transitions /= priors[:, np.newaxis]
    # Normalize priors too.
    priors /= np.sum(priors)
    return transitions, priors

def train_chord_models(train_ids):
    """Train Gaussian models for all chord data from a list of IDs.
    
    Args:
      train_ids:  List of IDs to pass to read_beat_chroma_labels().

    Returns:
      models: a list of sklearn.mixture.GMM objects, one for each class.
      transitions: np.array of size (num_classes, num_classes). 
        transitions[i, j] is the probability of moving to state j when 
        starting in state i.
      priors: 1D np.array giving the prior probability for each class.

    2016-04-03, 2010-04-07 Dan Ellis dpwe@ee.columbia.edu
    """
    features, labels = load_all_features_labels(train_ids)
    num_chroma = 12
    # We have a major and a minor chord model for each chroma, plus NOCHORD.
    num_models = 2 * num_chroma + 1
    # Global mean/covariance used for empty models.
    global_model = sklearn.mixture.GMM(n_components=1, 
                                       covariance_type='full')
    # Train a background model on all the data, regardless of label.
    global_model.fit(features)
    # Set up individual models for all chords.
    models = []
    for model_index in range(num_models):
        # labels contains one value in the range 0..24 for each row of 
        # features.
        true_example_rows = np.nonzero(labels == model_index)
        if true_example_rows:
            model = sklearn.mixture.GMM(n_components=1, 
                                        covariance_type='full')
            model.fit(features[true_example_rows])
            models.append(model)
        else:
            # No training data for this label, so substitute the 
            # background model.
            models.append(global_model)
    
    transitions, priors = estimate_transitions(labels, num_models)
    
    return models, transitions, priors

def viterbi_path(posteriors, transitions, priors):
    """Calculate Viterbi (best-cost) path through Markov model.
    
    Args:
      posteriors: np.array sized (num_frames, num_states) giving the 
        local-match posterior probability of being in state j at time i.
      transitions: np.array of (num_states, num_states).  For each row, 
        transitions(row, col) gives the probability of transitioning from
        state row to state col.
      priors: np.array of (num_states,) giving prior probability of 
        each state.    
    """
    num_frames, num_states = posteriors.shape
    traceback = np.zeros((num_frames, num_states), dtype=int)
    # Normalized best probability-to-date for each state.
    best_prob = priors * posteriors[0]
    best_prob /= np.sum(best_prob)
    for frame in range(1, num_frames):
        # Find most likely combination of previous prob-to-path, and 
        # transition.
        possible_transition_scores = (transitions * 
                                      np.outer(best_prob, posteriors[frame]))
        # The max is found for each destination state (column), so the max
        # is over all the possible preceding states (rows).
        traceback[frame] = np.argmax(possible_transition_scores, axis=0)
        best_prob = np.max(possible_transition_scores, axis=0)
        best_prob /= np.sum(best_prob)
    # Traceback from best final state to get best path.
    path = np.zeros(num_frames, dtype=int)
    path[-1] = np.argmax(best_prob)
    for frame in range(num_frames - 1, 0, -1):
        path[frame - 1] = traceback[frame, path[frame]]
    return path

def recognize_chords(chroma, models, transitions, priors):
    """Perform chord recognition on chroma feature matrix."""
    scores = np.array([model.score(chroma) for model in models])
    chords = viterbi_path(np.exp(scores.transpose()), transitions, priors)
    return chords, scores

In [37]:
train_list_filename = os.path.join(DATA_DIR, 'trainfilelist.txt')
train_ids = read_file_list(train_list_filename)
test_list_filename = os.path.join(DATA_DIR, 'testfilelist.txt')
test_ids = read_file_list(test_list_filename)

# Run the full set of training examples through the model training.
models, transitions, priors = train_chord_models(train_ids)
# Extract the means from each class's model to illustrate.
# model_means = np.concatenate([model.means_ for model in models])
# Construct a list of names for each of the 25 classes.
all_chords = ['-', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 
              'B', 'Cm', 'C#m', 'Dm', 'D#m', 'Em', 'Fm', 'F#m', 'Gm', 'G#m', 'Am', 'A#m', 'Bm']


Training features shape: (80423, 12)


In [38]:
file_id = test_ids[0]
print(file_id)
beat_times, chroma_features, label_indices = read_beat_chroma_labels(file_id)
hyp_chords, scores = recognize_chords(chroma_features, models, transitions, priors)


beatles/Help_/13-Yesterday


In [39]:
print(hyp_chords[:20])
print(label_indices[:20])
print(hyp_chords.shape)


[1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]
[0 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6]
(382,)


In [45]:
%matplotlib inline

# Beat tracking example
from __future__ import print_function
import numpy as np, scipy, matplotlib.pyplot as plt, librosa
from IPython.display import Audio

# Load the example clip
# y, sr = librosa.load('/home/py/projects/tmp/tmp.m4a')

y, sr = librosa.load('/home/py/projects/dataset/beatles/mp3s-32k/Help_/13-Yesterday.mp3')

In [46]:
# Separate harmonics and percussives into two waveforms
y_harmonic, y_percussive = librosa.effects.hpss(y)
# Beat track on the percussive signal
tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)
# 4. Convert the frame indices of beat events into timestamps
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
# Compute chroma features from the harmonic signal
chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)


  mask /= mask + (X_ref / Z)**power
  return array(a, dtype, copy=False, order=order)


In [47]:
lib_chords, scores = recognize_chords(beat_chroma.transpose(), models, transitions, priors)
print(lib_chords)
print('*****************************************')
print(label_indices)
print(lib_chords.shape)
print(label_indices.shape)

[ 0  0  6  6  6  6  6  6  6  6  6  6  6  6 17 10 10 10 15 15 15 15 11 11  1
  1  6  6  6  6  6  6 20 20 23  6  6  6  6  6  6  6  5  5 10 10 15 15 15  1
 11 11 11  6  6  6  6 17 15 20 20 20 23  6  6  6 10 10 10 10  3  1  6 22  8
  8  1  1  6  6  6  6 10 10 10 10  3  1 11 22  8  8  1  1  6  6  6  6  6  6
  6  6 17 22 22 22 17  3  3  1 11 11  1  1 11  6  6  6  6 20 20 20 23  6  6
  6 10 10 10 10  3  1 11  3  8  8  1  1  6  6  6  6 10 10 10 10  3  1 11 22
  8  8  1  1  6  6 11  6  6  6  6  6  5  5 10 10 15 15 15  6 11  1  1  1  6
  6  6  6  6  8  8  8  6  6  6  6  6  6  8 24 24 11 11]
*****************************************
[ 0  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6  6
 17 17 17 17 10 10 10 10 15 15 15 15 15 15 15 15 11 11 11 11  1  1  1  1  6
  6  6  6  6  6  6  6 15 15 15 15  8  8  8  8 11 11  6  6  6  6  6  6  6  6
  6  6  6  6  6  6 10 10 10 10 10 10 10 10 15 15 15 15 15 15 22 22 11 11 11
 11  1  1  1  1 11 11  6  6  6  6  6  6 15 15 15 15  8  8  8  8 11

In [48]:
Audio(y_harmonic, rate=sr)

In [44]:
for idx, chord_idx in enumerate(lib_chords):
    print (beat_times[idx],all_chords[chord_idx], sep=' - ')

3.1579138322 - -
3.8777324263 - -
4.66721088435 - D
5.34058956916 - D
5.94430839002 - A
6.52480725624 - A
7.22140589569 - E
7.8947845805 - E
8.6146031746 - E
9.33442176871 - E
10.0542403628 - E
10.7740589569 - E
11.493877551 - E
12.2136961451 - E
12.9335147392 - E
13.676553288 - E
14.3963718821 - E
15.1161904762 - E
15.8360090703 - G
16.579047619 - G
17.2988662132 - Am
18.0186848073 - Am
18.7385034014 - Am
19.4815419501 - Em
20.2013605442 - Em
20.9211791383 - Em
21.6177777778 - G
22.3840362812 - G
23.1038548753 - A
23.8004535147 - A
24.4970521542 - E
25.2168707483 - E
25.9366893424 - E
26.6565079365 - E
27.3763265306 - G
28.1193650794 - G
28.8391836735 - Am
29.5357823129 - Am
30.255600907 - Am
30.9754195011 - Em
31.6952380952 - Em
32.4150566893 - Em
33.1116553288 - G
33.7153741497 - G
34.3423129252 - G
34.946031746 - A
35.6426303855 - A
36.4088888889 - D
37.128707483 - D
37.8717460317 - C
38.6147845805 - C
39.3578231293 - B
40.100861678 - B
40.8671201814 - A#
41.5637188209 - A#
42.3067

IndexError: index 235 is out of bounds for axis 0 with size 235