# Notes

* `d2['file_scores'][k]['paths']` is a matrix of all-pairs segment-to-segment alignments
  * indices are relative to the segment in question
  * same..`['scores']` is the alignment scores: 
  * use `< thresholds['none']` to find the good segment pairs
* generate data from
  * isophonics functions
  * salami functions
  * l2 norm
* samples:
  * features = patches of cqt +- 5 frames on either side
  * positives = drawn from pairs of segments below threshold
  * negatives = "" above the threshold
* model evaluation
  * serra method or laplacian

In [1]:
import numpy as np
import librosa
import cPickle as pickle
import numba
import os
from joblib import Parallel, delayed

In [2]:
def symstack(X, n_steps=3, delay=1, **kwargs):
    '''Symmetric history stacking.
    
    like librosa.feature.stack_memory, but IN THE FUTURE!!!
    '''
    rpad = n_steps * delay
    Xpad = np.pad(X,
                  [(0, 0), (0, rpad)],
                  **kwargs)
    
    Xstack = librosa.feature.stack_memory(Xpad,
                                          n_steps=2 * n_steps + 1,
                                          delay=delay,
                                          **kwargs)
    
    return Xstack[:, rpad:]

In [20]:
def load_feature_data(file_name, n_steps=5):
    
    fname = '../data/features/{}.pk'.format(file_name)
    
    data = pickle.load(open(fname, 'r'))
    
    return symstack(data['cqgram'].astype(np.float32),
                    n_steps=n_steps,
                    mode='edge'), data['intframes']

In [21]:
@numba.jit
def get_feature_paths(X_raw, t_i, t_j, n_samples, max_samples):
    
    X = np.empty((len(t_i), X_raw.shape[0]), dtype=X_raw.dtype)
    
    k = 0
    for i, j in zip(t_i, t_j):
        if n_samples[i] >= max_samples:
            continue
        n_samples[i] += 1
        n_samples[j] += 1
        
        X[k] = np.abs(X_raw[:, i] - X_raw[:, j])
        k = k + 1
    
    return X[:k]

In [22]:
def make_training_data(max_samples=2,
                       paths=None,
                       file_name=None,
                       thresholds=None,
                       labels=None,
                       intervals=None,
                       fmeasures=None,
                       scores=None):
    
    # intframes gives the section->cqt frame alignments
    X_raw, intframes = load_feature_data(file_name)
    
    thresh = thresholds['none']
    
    n_segments = len(paths)
    
    n_samples = np.zeros((2, X_raw.shape[1]))
    
    ex_x, ex_y = [], []
    
    for seg_i in range(n_segments):
        off_i = intframes[seg_i, 0]
        
        for seg_j in range(seg_i + 1, len(paths[seg_i])):
            off_j = intframes[seg_j, 0]
            
            if not isinstance(paths[seg_i][seg_j], list):
                continue
                
            if scores[seg_i, seg_j] <= thresh:
                y = 1
            else:
                y = 0
                
            # Pull out features from the path
            t_i, t_j = paths[seg_i][seg_j]
            t_i += off_i
            t_j += off_j
            
            X = get_feature_paths(X_raw, t_i, t_j, n_samples[y], max_samples)
            ex_x.append(X)
            ex_y.append(y * np.ones(len(X), dtype=np.int32))
    
    ex_x = np.concatenate(ex_x)
    ex_y = np.concatenate(ex_y)

    return ex_x, ex_y

In [28]:
import os
def process_file(file_scores, max_samples=4):
    
    if file_scores['paths']:
        X, Y = make_training_data(max_samples=max_samples,
                                  **file_scores)
        outfile = '../data/labeled_features/{}_beats.npz'.format(os.path.basename(file_scores['file_name']))
        np.savez(outfile, X=X, Y=Y)

In [31]:
#all_data = pickle.load(open('../data/scores_datasetESALAMI_levelEfunction_distEL2.pk'))
#all_data = pickle.load(open('../data/scores_datasetEIsophonics_levelEfunction_distEL2.pk'))
#all_data = pickle.load(open('../data/scores_datasetEIsophonics_levelEfunction_distEL2_beats.pk'))
all_data = pickle.load(open('../data/scores_datasetESALAMI_levelEfunction_distEL2_beats.pk'))

In [32]:
Parallel(n_jobs=4, verbose=20)(delayed(process_file)(data) for data in all_data['file_scores']);

[Parallel(n_jobs=4)]: Done   1 out of 769 | elapsed:    0.5s remaining:  5.8min
[Parallel(n_jobs=4)]: Done  30 out of 769 | elapsed:    1.3s remaining:   31.2s
[Parallel(n_jobs=4)]: Done  69 out of 769 | elapsed:    2.2s remaining:   22.5s
[Parallel(n_jobs=4)]: Done 108 out of 769 | elapsed:    3.6s remaining:   21.9s
[Parallel(n_jobs=4)]: Done 147 out of 769 | elapsed:    5.2s remaining:   22.2s
[Parallel(n_jobs=4)]: Done 186 out of 769 | elapsed:    7.5s remaining:   23.4s
[Parallel(n_jobs=4)]: Done 225 out of 769 | elapsed:    8.4s remaining:   20.4s
[Parallel(n_jobs=4)]: Done 264 out of 769 | elapsed:   11.0s remaining:   21.0s
[Parallel(n_jobs=4)]: Done 303 out of 769 | elapsed:   12.4s remaining:   19.1s
[Parallel(n_jobs=4)]: Done 342 out of 769 | elapsed:   14.9s remaining:   18.6s
[Parallel(n_jobs=4)]: Done 381 out of 769 | elapsed:   17.6s remaining:   17.9s
[Parallel(n_jobs=4)]: Done 420 out of 769 | elapsed:   19.7s remaining:   16.3s
[Parallel(n_jobs=4)]: Done 459 out of 76