# Notes

* `d2['file_scores'][k]['paths']` is a matrix of all-pairs segment-to-segment alignments
  * indices are relative to the segment in question
  * same..`['scores']` is the alignment scores: 
  * use `< thresholds['none']` to find the good segment pairs
* generate data from
  * isophonics functions
  * salami functions
  * l2 norm
* samples:
  * features = patches of cqt +- 5 frames on either side
  * positives = drawn from pairs of segments below threshold
  * negatives = "" above the threshold
* model evaluation
  * serra method or laplacian

In [1]:
import numpy as np
import librosa
import cPickle as pickle
import numba
import os
from joblib import Parallel, delayed

In [22]:
def symstack(X, n_steps=5, delay=1, **kwargs):
    '''Symmetric history stacking.
    
    like librosa.feature.stack_memory, but IN THE FUTURE!!!
    '''
    rpad = n_steps * delay
    Xpad = np.pad(X,
                  [(0, 0), (0, rpad)],
                  **kwargs)
    
    Xstack = librosa.feature.stack_memory(Xpad,
                                          n_steps=2 * n_steps + 1,
                                          delay=delay,
                                          **kwargs)
    
    return Xstack[:, rpad:]

In [15]:
def load_feature_data(file_name, n_steps=5):
    
    fname = '../data/features/{}.pk'.format(file_name)
    
    data = pickle.load(open(fname, 'r'))
    
    return symstack(data['cqgram'].astype(np.float32),
                    n_steps=n_steps,
                    mode='edge'), data['intframes']

In [33]:
@numba.jit
def get_feature_paths(X_raw, t_i, t_j, n_samples, max_samples):
    
    X = np.empty((len(t_i), X_raw.shape[0]), dtype=X_raw.dtype)
    
    k = 0
    for i, j in zip(t_i, t_j):
        if n_samples[i] >= max_samples:
            continue
        n_samples[i] += 1
        n_samples[j] += 1
        
        X[k] = np.abs(X_raw[:, i] - X_raw[:, j])
        k = k + 1
    
    return X[:k]

In [37]:
def make_training_data(max_samples=2,
                       paths=None,
                       file_name=None,
                       thresholds=None,
                       labels=None,
                       intervals=None,
                       fmeasures=None,
                       scores=None):
    
    # intframes gives the section->cqt frame alignments
    X_raw, intframes = load_feature_data(file_name)
    
    thresh = thresholds['none']
    
    n_segments = len(paths)
    
    n_samples = np.zeros((2, X_raw.shape[1]))
    
    ex_x, ex_y = [], []
    
    for seg_i in range(n_segments):
        off_i = intframes[seg_i, 0]
        
        for seg_j in range(seg_i + 1, len(paths[seg_i])):
            off_j = intframes[seg_j, 0]
            
            if not isinstance(paths[seg_i][seg_j], list):
                continue
                
            if scores[seg_i, seg_j] <= thresh:
                y = 1
            else:
                y = 0
                
            # Pull out features from the path
            t_i, t_j = paths[seg_i][seg_j]
            t_i += off_i
            t_j += off_j
            
            X = get_feature_paths(X_raw, t_i, t_j, n_samples[y], max_samples)
            ex_x.append(X)
            ex_y.append(y * np.ones(len(X), dtype=np.int32))
    
    ex_x = np.concatenate(ex_x)
    ex_y = np.concatenate(ex_y)

    return ex_x, ex_y

In [38]:
import os
def process_file(file_scores, max_samples=2):
    
    if file_scores['paths']:
        X, Y = make_training_data(max_samples=max_samples,
                                  **file_scores)
        outfile = '../data/labeled_features/{}.npz'.format(os.path.basename(file_scores['file_name']))
        np.savez(outfile, X=X, Y=Y)

In [42]:
#all_data = pickle.load(open('../data/scores_datasetESALAMI_levelEfunction_distEL2.pk'))
all_data = pickle.load(open('../data/scores_datasetEIsophonics_levelEfunction_distEL2.pk'))

In [44]:
Parallel(n_jobs=4, verbose=20)(delayed(process_file)(data) for data in all_data['file_scores']);

[Parallel(n_jobs=4)]: Done   1 out of 299 | elapsed:    0.8s remaining:  4.2min
[Parallel(n_jobs=4)]: Done  16 out of 299 | elapsed:    1.8s remaining:   31.8s
[Parallel(n_jobs=4)]: Done  31 out of 299 | elapsed:    2.9s remaining:   25.2s
[Parallel(n_jobs=4)]: Done  46 out of 299 | elapsed:    4.6s remaining:   25.1s
[Parallel(n_jobs=4)]: Done  61 out of 299 | elapsed:    8.7s remaining:   34.0s
[Parallel(n_jobs=4)]: Done  76 out of 299 | elapsed:    9.8s remaining:   28.9s
[Parallel(n_jobs=4)]: Done  91 out of 299 | elapsed:   11.7s remaining:   26.7s
[Parallel(n_jobs=4)]: Done 106 out of 299 | elapsed:   14.1s remaining:   25.6s
[Parallel(n_jobs=4)]: Done 121 out of 299 | elapsed:   17.1s remaining:   25.1s
[Parallel(n_jobs=4)]: Done 136 out of 299 | elapsed:   19.6s remaining:   23.5s
[Parallel(n_jobs=4)]: Done 151 out of 299 | elapsed:   22.4s remaining:   22.0s
[Parallel(n_jobs=4)]: Done 166 out of 299 | elapsed:   24.8s remaining:   19.9s
[Parallel(n_jobs=4)]: Done 181 out of 29