In [136]:
import glob
import os
import sys
import jams2
import numpy as np
import pandas as pd
from scipy.spatial import distance
import add_bounds as AB

sys.path.append("..")
import msaf_io as MSAF
import eval2 as EV
import mir_eval

# Main ds folder
ds_path = "/Users/uri/datasets/Segments"

In [38]:
# Obtain all the annotated boundaries

# Get all annotations
jam_files = glob.glob(os.path.join(ds_path, "annotations", "*.jams"))

# Read all boundaries
all_boundaries = []
all_files = []
for jam_file in jam_files:
    ds_prefix = os.path.basename(jam_file).split("_")[0]
    try:
        ann_inter, ann_labels = jams2.converters.load_jams_range(
            jam_file, "sections", annotator=0, context=MSAF.prefix_dict[ds_prefix])
    except:
        print "Warning: no annotations for file %s" % jam_file
        continue
    ann_times = EV.intervals_to_times(ann_inter)
    all_boundaries.append(ann_times)
    all_files.append(jam_file)



In [137]:
# Obtain all the excerpts that contain at least "nbounds" annotated boundaries within "th" seconds

th = 80
nbounds = 8

B = nbounds - 1
b_offset = 2

excerpts = []  # List of tuples (audio_file, boundaries, start_bound_idx)
for or_bounds, jam_file in zip(all_boundaries, all_files):
    bounds = or_bounds[b_offset:-1]  # Remove first offset and last boundary
    for i, bound in enumerate(bounds[:-B]):
        if th >= bounds[i+B] - bound:
            audio_file = jam_file.replace("annotations", "audio").replace("jams", "mp3")
            if not os.path.isfile(audio_file):
                audio_file = audio_file.replace("mp3", "wav")
            excerpts.append((audio_file, or_bounds, i+b_offset))
            break

In [139]:
# Save excertps to disk
tmp_file = "tmpo.wav"
out_folder = "www/audio/"
t_offset = 5
reload(AB)

def get_extra_bounds(bounds, N=1):
    """Ads an extra boundary to the longest gap between boundaries."""
    # TODO: Align to downbeat
    for n in xrange(N):
        idx = np.argmax(np.diff(bounds))
        extra_bound = (bounds[idx+1] - bounds[idx]) / 2.0 + bounds[idx]
        bounds = list(bounds)
        bounds.insert(idx+1, extra_bound)
    return bounds

def get_less_bounds(bounds, N=1):
    """Ads an extra boundary to the longest gap between boundaries."""
    for n in xrange(N):
        idx = np.argmin(np.diff(bounds))
        bounds = list(bounds)
        bounds.pop(idx+1)
    return bounds

def get_excerpt_bounds(excerpt):
    """Gets the excerpt boundaries."""
    bounds = excerpt[1]
    start_idx = excerpt[2]
    excerpt_bounds = []
    start_time = bounds[start_idx]
    for b in bounds[start_idx:]:
        if b - start_time < th:
            excerpt_bounds.append(b)
    return excerpt_bounds

def eval_bounds(synth_bounds, ann_bounds):
    """Evaluates the synthesized boundaries agains the annotated ones."""
    synth_inter = EV.times_to_intervals(synth_bounds)
    ann_inter = EV.times_to_intervals(ann_bounds)
    P, R, F = mir_eval.boundary.detection(ann_inter, synth_inter, window=3, trim=False)
    return F, P, R

excerpt_ds = pd.DataFrame()

# Bad tracks
bad = [1, 18, 19] + range(21, 26) + [28] + range(33, 40) + [44, 58, 59]

k = 0
for i, excerpt in enumerate(excerpts):
    
    # Ignore bad tracks
    if i in bad:
        continue
        
    ds_entry = {}
    ds_entry["id"] = k
    ds_entry["track_id"] = os.path.basename(excerpt[0])
    excerpt_bounds = get_excerpt_bounds(excerpt)
    
    # Get extra and less bounds
    N = 2
    extra_bounds = get_extra_bounds(excerpt_bounds, N=N)
    less_bounds = get_less_bounds(excerpt_bounds, N=N)
    
    ds_entry["v1F"], ds_entry["v1P"], ds_entry["v1R"] = eval_bounds(extra_bounds, excerpt_bounds)
    ds_entry["v3F"], ds_entry["v3P"], ds_entry["v3R"] = eval_bounds(less_bounds, excerpt_bounds)
    
    excerpt_ds = excerpt_ds.append(ds_entry, ignore_index=True)
    # Create temporary file
    start_time = excerpt[1][excerpt[2]] - t_offset
    end_time = excerpt[1][excerpt[2]] + th + t_offset
    AB.mp32wav(excerpt[0], tmp_file)
    
#     print ds_entry

    # Create all versions of boundaries per excerpt
    AB.add_boundaries(tmp_file, extra_bounds, 
                      output=os.path.join(out_folder, "%d_v1.wav" % k),
                      start=start_time, end=end_time)
    AB.add_boundaries(tmp_file, excerpt_bounds, 
                      output=os.path.join(out_folder, "%d_v2.wav" % k),
                      start=start_time, end=end_time)
    AB.add_boundaries(tmp_file, less_bounds, 
                      output=os.path.join(out_folder, "%d_v3.wav" % k),
                      start=start_time, end=end_time)
    
    k += 1
excerpt_ds.to_csv("excerpts.csv", sep='\t', encoding='utf-8')

Wrote www/audio/0_v1.mp3
Wrote www/audio/0_v2.mp3
Wrote www/audio/0_v3.mp3
Wrote www/audio/1_v1.mp3
Wrote www/audio/1_v2.mp3
Wrote www/audio/1_v3.mp3
Wrote www/audio/2_v1.mp3
Wrote www/audio/2_v2.mp3
Wrote www/audio/2_v3.mp3
Wrote www/audio/3_v1.mp3
Wrote www/audio/3_v2.mp3
Wrote www/audio/3_v3.mp3
Wrote www/audio/4_v1.mp3
Wrote www/audio/4_v2.mp3
Wrote www/audio/4_v3.mp3
Wrote www/audio/5_v1.mp3
Wrote www/audio/5_v2.mp3
Wrote www/audio/5_v3.mp3
Wrote www/audio/6_v1.mp3
Wrote www/audio/6_v2.mp3
Wrote www/audio/6_v3.mp3
Wrote www/audio/7_v1.mp3
Wrote www/audio/7_v2.mp3
Wrote www/audio/7_v3.mp3
Wrote www/audio/8_v1.mp3
Wrote www/audio/8_v2.mp3
Wrote www/audio/8_v3.mp3
Wrote www/audio/9_v1.mp3
Wrote www/audio/9_v2.mp3
Wrote www/audio/9_v3.mp3
Wrote www/audio/10_v1.mp3
Wrote www/audio/10_v2.mp3
Wrote www/audio/10_v3.mp3
Wrote www/audio/11_v1.mp3
Wrote www/audio/11_v2.mp3
Wrote www/audio/11_v3.mp3
Wrote www/audio/12_v1.mp3
Wrote www/audio/12_v2.mp3
Wrote www/audio/12_v3.mp3
Wrote www/audio/