# Differences in Hierarchical Annotations

Let's investigate the SALAMI annotations first. We will compare:

* NCE between upper and lower cases for each track between annotators

In [53]:
from collections import namedtuple
import glob
import jams
import mir_eval
import numpy as np
import os
import pandas as pd

# Storing the intervals and labels of a segmentation in a named tuple
Segmentation = namedtuple('Segmentation', ['inters', 'labels'])

In [64]:
salami_jam_files = glob.glob("/home/uri/Projects/jams-data-salami/datasets/SALAMI/*.jams")
spam_jam_files = glob.glob("/home/uri/Projects/msaf-data/SPAM/references/*.jams")

In [66]:
def compute_metrics(upper, lower):
    """Computes the metrics for the given segmentations of a single annotator.
    
    Parameters
    ----------
    upper: Segmentation
        The upper segmentation of the annotator.
    lower: Segmentation
        The lower segmentation of the annotator.
        
    Returns
    -------
    res: dict
        Dictionary containing the following metrics:
        - NCE scores
        - number of segments
        - number of unique labels
        - mean segment length
        - number of segments per label
    """
    res = {}
    
    def _compute_layer_metrics(layer, prefix):
        """Computes single-layer metrics"""
        res["%snsegs" % prefix] = len(layer.labels)
        res["%snunique_labels" % prefix] = len(np.unique(layer.labels))
        res["%smean_seg_dur" % prefix] = np.mean([inter[1] - inter[0] 
                                                  for inter in layer.inters])
        res["%snsegs_per_label" % prefix] = \
            {label: len(np.where(np.asarray(layer.labels) == label)[0]) 
             for label in np.unique(layer.labels)}
    
    # Compute NCE
    try:
        res["S_o"], res["S_u"], res["S_f"] = \
            mir_eval.segment.nce(upper.inters, upper.labels, 
                                 lower.inters, lower.labels)
    except ValueError:
        res["S_o"], res["S_u"], res["S_f"] = (None, None, None)
    
    # Upper metrics
    _compute_layer_metrics(upper, "upper_")
    
    # Lower metrics
    _compute_layer_metrics(lower, "lower_")
    
    return res

def process_jam(jam_file):
    """Processes a given jam, obtaining all their annotators and having pairwise comparisons
    between upper and lower layers of segmentation."""
    jam = jams.load(jam_file)
    all_res = []
    for upper, lower in zip(jam.search(namespace="segment_salami_upper"),
                            jam.search(namespace="segment_salami_lower")):
        # Make sure we are dealing with the same annotator
        assert upper.annotation_metadata.annotator.name == lower.annotation_metadata.annotator.name

        # Get actual annotations
        upper_seg = Segmentation(*upper.data.to_interval_values())
        lower_seg = Segmentation(*lower.data.to_interval_values())

        # Compute all metrics
        res = compute_metrics(upper_seg, lower_seg)

        # Store additional info
        res["annotator_name"] = upper.annotation_metadata.annotator.name
        res["track_dur"] = jam.file_metadata.duration
        res["track_name"] = os.path.basename(jam_file)[:-5]
        all_res.append(res)
    return all_res

In [62]:
def compute_all_jams(jam_files):
    all_data = []
    for jam_file in jam_files:
        all_data += process_jam(jam_file)
    all_df = pd.DataFrame(all_data)
    all_df = all_df[["track_name", "track_dur", "annotator_name",
                     u'S_f', u'S_o', u'S_u', 
                     u'lower_mean_seg_dur',u'lower_nsegs', 
                     u'lower_nsegs_per_label', u'lower_nunique_labels',
                     u'upper_mean_seg_dur', u'upper_nsegs', 
                     u'upper_nsegs_per_label', u'upper_nunique_labels']]
    return all_df

In [69]:
# Compute all the salami metrics
salami_df = compute_all_jams(salami_jam_files)
salami_df.to_csv("../data/salami_annotator_metrics.tsv", sep="\t")

In [70]:
# Compute all the spam metrics
spam_df = compute_all_jams(spam_jam_files)
spam_df.to_csv("../data/spam_annotator_metrics.tsv", sep="\t")

In [68]:
spam_df

Unnamed: 0,track_name,track_dur,annotator_name,S_f,S_o,S_u,lower_mean_seg_dur,lower_nsegs,lower_nsegs_per_label,lower_nunique_labels,upper_mean_seg_dur,upper_nsegs,upper_nsegs_per_label,upper_nunique_labels
0,SALAMI_876,256.026122,Colin Hua,0.888079,0.798690,1.000000,21.335510,12,"{u'a': 1, u'yyyyy': 1, u'c': 1, u'b': 2, u'd':...",10,36.575160,7,"{u'A': 1, u'YYYYY': 1, u'C': 1, u'B': 2, u'D':...",6
1,SALAMI_876,256.026122,Eleni Vasilia Maltas,0.724203,0.574594,0.979149,5.565785,46,"{u'zzzzz': 1, u'd'': 1, u'm'': 1, u'end': 1, u...",22,25.602612,10,"{u'A': 2, u'C': 2, u'B': 3, u'END': 1, u'ZZZZZ...",6
2,SALAMI_876,256.026122,John Turner,0.703684,0.585289,0.882125,14.223673,18,"{u'a': 1, u'c': 2, u'b': 6, u'e': 1, u'd': 2, ...",8,51.205224,5,"{u'A': 2, u'B': 2, u'ZZZZZ': 1}",3
3,SALAMI_876,256.026122,Evan S. Johnson,0.807968,0.765342,0.855623,12.801306,20,"{u'a': 5, u'c': 2, u'b': 6, u'd': 1, u'b''': 2...",8,42.671020,6,"{u'A': 2, u'B': 1, u'ZZZZZ': 1, u'SILENCE': 1,...",5
4,SALAMI_876,256.026122,Shuli Tang,0.858662,0.752330,1.000000,36.571843,7,"{u'a': 1, u'c': 1, u'b': 1, u'zzzzz': 1, u'c''...",7,42.667151,6,"{u'A': 1, u'C': 1, u'B': 2, u'SILENCE': 1, u'Z...",5
5,SALAMI_108,550.295510,Colin Hua,0.835007,0.716748,1.000000,32.366226,17,"{u'a': 1, u'c': 1, u'b': 1, u'e': 1, u'd': 1, ...",16,78.603693,7,"{u'A': 1, u'C': 1, u'B': 1, u'E': 1, u'D': 1, ...",6
6,SALAMI_108,550.295510,Eleni Vasilia Maltas,0.832459,0.713002,1.000000,20.381315,27,"{u'zzzzz': 1, u'j'': 1, u'h'': 1, u'end': 1, u...",27,61.143946,9,"{u'A': 1, u'YYYYY': 1, u'C': 1, u'B': 1, u'E':...",9
7,SALAMI_108,550.295510,John Turner,0.761616,0.635561,0.950043,28.962922,19,"{u'a': 1, u'c': 1, u'b': 1, u'e': 1, u'd': 1, ...",18,91.715918,6,"{u'A': 1, u'C': 1, u'B': 1, u'D': 1, u'A'': 1,...",6
8,SALAMI_108,550.295510,Evan S. Johnson,0.848784,0.737294,1.000000,23.925892,23,"{u'a': 1, u'c': 1, u'b': 1, u'e': 1, u'd': 1, ...",21,55.029551,10,"{u'A': 1, u'C': 1, u'B': 1, u'E': 1, u'D': 1, ...",8
9,SALAMI_108,550.295510,Shuli Tang,0.574174,0.499050,0.675924,50.022643,11,"{u'a': 1, u'c': 1, u'b': 1, u'e': 1, u'd': 1, ...",11,45.846349,12,"{u'A': 3, u'C': 2, u'B': 4, u'D': 1, u'ZZZZZ':...",6
