In [34]:
import librosa
import glob
import os
import dtw
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='dark')
%matplotlib inline

import msaf # develop branch
from msaf import jams2
import numpy as np
import scipy
import mir_eval
import scipy.spatial.distance
from joblib import Parallel, delayed
import cPickle as pickle

In [35]:
ds_path = "/Users/uri/datasets/Segments/"
ds_path = "/home/uri/datasets/Segments/"

In [36]:
%run 'Cross-repetition DTW.ipynb'



IndexError: list index out of range

In [37]:
%run 'Repetition detection threshold.ipynb'

## Parameters to Explore ##

Datasets (levels):

* SALAMI (large_scale, small_scale)
* Isophonics (function)

Distances for the DTW scores:

* L1
* L2
* correlation

Normalization of the DTW scores to compute the threshold and the F-measures:

* None
* Minimum
* Maximum
* Harmonic Mean

In [38]:
datasets = ["SALAMI", "Isophonics"]
dist_dict = {
    "L1": scipy.spatial.distance.cityblock, 
    "L2": scipy.spatial.distance.euclidean,
    "correlation": scipy.spatial.distance.correlation
}
dataset_levels = {
    "Isophonics": ["function"],
    "SALAMI": ["large_scale", "small_scale"]
}
norms = ["none", "min", "max", "hmean"]

In [39]:
def compute_score(file_struct, level, dist_key):
    try:
        ref_inter, ref_labels = jams2.converters.load_jams_range(
            file_struct.ref_file, "sections", annotator=0, context=level)
        D, P = make_cost_matrix(file_struct.audio_file, ref_inter, ref_labels, dist=dist_dict[dist_key])
        thresholds = {}
        fmeasures = {}
        for norm in norms:
            thresholds[norm], fmeasures[norm] = plot_curve(file_name=file_struct.audio_file, 
                                                           intervals=ref_inter, labels=ref_labels,
                                                           scores=D, norm=norm)
    except:
        print "warning: no annotations for file", file_struct.audio_file
        ref_inter = None
        ref_labels = None
        D = None
        P = None
        thresholds = None
        fmeasures = None
    ret = {
        "intervals": ref_inter,
        "labels": ref_labels,
        "scores": D,
        "paths": P,
        "thresholds": thresholds,
        "fmeasures": fmeasures,
        "file_name": os.path.basename(file_struct.audio_file)
    }
    return ret

In [42]:
n_jobs = 6

for dataset in datasets:
    files = msaf.io.get_dataset_files(ds_path, ds_name=dataset)
    if dataset == "Isophonics":
        continue
    for level in dataset_levels[dataset]:
        for dist_key in dist_dict.keys():
            if dataset == "SALAMI" and level == "small_scale" and dist_key == "L1":
                continue
            print "Computing: ", dataset, level, dist_key
            scores = Parallel(n_jobs=n_jobs)(delayed(compute_score)(
                file_struct, level, dist_key)
                for file_struct in files[:])
            result = {
                "level": level,
                "dist": dist_key,
                "file_scores": scores
            }
            out_file = "scores_datasetE%s_levelE%s_distE%s.pk" % (dataset, level, dist_key)
            with open(out_file, "w") as f:
                pickle.dump(result, f, protocol=-1)

Computing:  SALAMI large_scale correlation
Computing:  SALAMI large_scale L2
 /home/uri/datasets/Segments/audio/SALAMI_1030.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1178.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1410.mp3
 /home/uri/datasets/Segments/audio/SALAMI_794.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1466.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1320.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1430.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1398.mp3
 /home/uri/datasets/Segments/audio/SALAMI_724.mp3
 /home/uri/datasets/Segments/audio/SALAMI_742.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1040.mp3
 /home/uri/datasets/Segments/audio/SALAMI_964.mp3
 /home/uri/datasets/Segments/audio/SALAMI_716.mp3
 /home/uri/datasets/Segments/audio/SALAMI_918.mp3
 /home/uri/datasets/Segments/audio/SALAMI_138.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1500.mp3
 /home/uri/datasets/Segments/audio/SALAMI_710.mp3


  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or


Computing:  SALAMI large_scale L1
 /home/uri/datasets/Segments/audio/SALAMI_1030.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1320.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1466.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1486.mp3
 /home/uri/datasets/Segments/audio/SALAMI_138.mp3
 /home/uri/datasets/Segments/audio/SALAMI_964.mp3
 /home/uri/datasets/Segments/audio/SALAMI_710.mp3
 /home/uri/datasets/Segments/audio/SALAMI_724.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1398.mp3
 /home/uri/datasets/Segments/audio/SALAMI_918.mp3
 /home/uri/datasets/Segments/audio/SALAMI_308.mp3
 /home/uri/datasets/Segments/audio/SALAMI_714.mp3
 /home/uri/datasets/Segments/audio/SALAMI_720.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1500.mp3


  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or


 /home/uri/datasets/Segments/audio/SALAMI_1486.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1410.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1430.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1440.mp3
 /home/uri/datasets/Segments/audio/SALAMI_714.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1320.mp3
 /home/uri/datasets/Segments/audio/SALAMI_964.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1052.mp3
 /home/uri/datasets/Segments/audio/SALAMI_308.mp3
 /home/uri/datasets/Segments/audio/SALAMI_720.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1398.mp3
 /home/uri/datasets/Segments/audio/SALAMI_716.mp3
 correlation


  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or


Computing:  SALAMI small_scale L2
 /home/uri/datasets/Segments/audio/SALAMI_1486.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1178.mp3
 /home/uri/datasets/Segments/audio/SALAMI_964.mp3
 /home/uri/datasets/Segments/audio/SALAMI_348.mp3
 /home/uri/datasets/Segments/audio/SALAMI_724.mp3
 /home/uri/datasets/Segments/audio/SALAMI_308.mp3
 /home/uri/datasets/Segments/audio/SALAMI_558.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1126.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1466.mp3
 /home/uri/datasets/Segments/audio/SALAMI_720.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1500.mp3
 /home/uri/datasets/Segments/audio/SALAMI_68.mp3
 /home/uri/datasets/Segments/audio/SALAMI_714.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1040.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1052.mp3
 /home/uri/datasets/Segments/audio/SALAMI_710.mp3
 /home/uri/datasets/Segments/audio/SALAMI_404.mp3
 /home/uri/datasets/Segments/audio/SALAMI_918.mp3


  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or


 /home/uri/datasets/Segments/audio/SALAMI_1178.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1486.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1398.mp3
 /home/uri/datasets/Segments/audio/SALAMI_964.mp3
 /home/uri/datasets/Segments/audio/SALAMI_558.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1320.mp3
 /home/uri/datasets/Segments/audio/SALAMI_724.mp3
 /home/uri/datasets/Segments/audio/SALAMI_720.mp3
 /home/uri/datasets/Segments/audio/SALAMI_918.mp3
 /home/uri/datasets/Segments/audio/SALAMI_308.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1126.mp3
 /home/uri/datasets/Segments/audio/SALAMI_714.mp3
 /home/uri/datasets/Segments/audio/SALAMI_1466.mp3
 /home/uri/datasets/Segments/audio/SALAMI_68.mp3
 /home/uri/datasets/Segments/audio/SALAMI_710.mp3


  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  not (np.all(classes == [0, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or
  np.all(classes == [-1, 1]) or


In [27]:
# Remove redundant data
with open("scores_datasetESALAMI_levelElarge_scale_distEL1.pk", "r") as f:
    data = pickle.load(f)

In [41]:
pk_files = glob.glob("*.pk")
pk_files = ["scores_datasetESALAMI_levelEsmall_scale_distEL1.pk"]
for pk_file in pk_files:
    with open(pk_file, "r") as f:
        data = pickle.load(f)
    updated_scores = []
    for file_scores in data["file_scores"]:
        new_entry = file_scores
        new_entry["scores"] = np.asarray(file_scores["scores"], dtype=np.float32)
        if file_scores["labels"] is not None:
            N = len(file_scores["labels"])
            for i in range(N):
                for j in range(N):
                    if j <= i:
                        # Remove lower triangle
                        new_entry["paths"][i][j] = 0
                    else:
                        # Convert to int32
                        try:
                            if len(new_entry["paths"][i][j]) == 2:
                                new_entry["paths"][i][j][0] = np.asarray(new_entry["paths"][i][j][0], dtype=np.int32)
                                new_entry["paths"][i][j][1] = np.asarray(new_entry["paths"][i][j][1], dtype=np.int32)
                        except:
                            pass
        updated_scores.append(new_entry)

    data["file_scores"] = updated_scores
    with open("conv_" + pk_file, "w") as f:
        data = pickle.dump(data, f, protocol=-1)

In [18]:
caca = np.asarray([1,2,3,4], dtype=np.int32)
data["file_scores"][0]["scores"].dtype

dtype('float64')