# Differences in Hierarchical Annotations

Let's investigate the SALAMI annotations first. We will compare:

* NCE between upper and lower cases for each track between annotators

In [53]:
from collections import namedtuple
import glob
import jams
import mir_eval
import numpy as np
import os
import pandas as pd

# Storing the intervals and labels of a segmentation in a named tuple
Segmentation = namedtuple('Segmentation', ['inters', 'labels'])

In [64]:
salami_jam_files = glob.glob("/home/uri/Projects/jams-data-salami/datasets/SALAMI/*.jams")
spam_jam_files = glob.glob("/home/uri/Projects/msaf-data/SPAM/references/*.jams")

In [66]:
def compute_metrics(upper, lower):
    """Computes the metrics for the given segmentations of a single annotator.
    
    Parameters
    ----------
    upper: Segmentation
        The upper segmentation of the annotator.
    lower: Segmentation
        The lower segmentation of the annotator.
        
    Returns
    -------
    res: dict
        Dictionary containing the following metrics:
        - NCE scores
        - number of segments
        - number of unique labels
        - mean segment length
        - number of segments per label
    """
    res = {}
    
    def _compute_layer_metrics(layer, prefix):
        """Computes single-layer metrics"""
        res["%snsegs" % prefix] = len(layer.labels)
        res["%snunique_labels" % prefix] = len(np.unique(layer.labels))
        res["%smean_seg_dur" % prefix] = np.mean([inter[1] - inter[0] 
                                                  for inter in layer.inters])
        res["%snsegs_per_label" % prefix] = \
            {label: len(np.where(np.asarray(layer.labels) == label)[0]) 
             for label in np.unique(layer.labels)}
    
    # Compute NCE
    try:
        res["S_o"], res["S_u"], res["S_f"] = \
            mir_eval.segment.nce(upper.inters, upper.labels, 
                                 lower.inters, lower.labels)
    except ValueError:
        res["S_o"], res["S_u"], res["S_f"] = (None, None, None)
    
    # Upper metrics
    _compute_layer_metrics(upper, "upper_")
    
    # Lower metrics
    _compute_layer_metrics(lower, "lower_")
    
    return res

def process_jam(jam_file):
    """Processes a given jam, obtaining all their annotators and having pairwise comparisons
    between upper and lower layers of segmentation."""
    jam = jams.load(jam_file)
    all_res = []
    for upper, lower in zip(jam.search(namespace="segment_salami_upper"),
                            jam.search(namespace="segment_salami_lower")):
        # Make sure we are dealing with the same annotator
        assert upper.annotation_metadata.annotator.name == lower.annotation_metadata.annotator.name

        # Get actual annotations
        upper_seg = Segmentation(*upper.data.to_interval_values())
        lower_seg = Segmentation(*lower.data.to_interval_values())

        # Compute all metrics
        res = compute_metrics(upper_seg, lower_seg)

        # Store additional info
        res["annotator_name"] = upper.annotation_metadata.annotator.name
        res["track_dur"] = jam.file_metadata.duration
        res["track_name"] = os.path.basename(jam_file)[:-5]
        all_res.append(res)
    return all_res

In [62]:
def compute_all_jams(jam_files):
    all_data = []
    for jam_file in jam_files:
        all_data += process_jam(jam_file)
    all_df = pd.DataFrame(all_data)
    all_df = all_df[["track_name", "track_dur", "annotator_name",
                     u'S_f', u'S_o', u'S_u', 
                     u'lower_mean_seg_dur',u'lower_nsegs', 
                     u'lower_nsegs_per_label', u'lower_nunique_labels',
                     u'upper_mean_seg_dur', u'upper_nsegs', 
                     u'upper_nsegs_per_label', u'upper_nunique_labels']]
    return all_df

In [59]:
# Compute all the salami metrics
salami_df = compute_all_jams(salami_jam_files)
salami_df.to_csv("salami_annotator_metrics.tsv", sep="\t")

In [67]:
# Compute all the spam metrics
spam_df = compute_all_jams(spam_jam_files)
spam_df.to_csv("spam_annotator_metrics.tsv", sep="\t")