# JAMS Creation #

This notebook creates the JAMS data given the mp3 files (containing metadata in their ID3 tags) and the actual raw annotations.

In [1]:
import eyed3
import glob
import jams
import os
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import pickle
import librosa

DATASET_PATH = "../dataset/"
BEAT_DOWNBEAT_RAW_ANN_PATH = DATASET_PATH + "beats/"
SEGMENT_RAW_ANN_PATH = DATASET_PATH + "segments/"
MP3_FILES = glob.glob("/Users/onieto/datasets/Harmonix/audio/*.mp3")
MBIDS_TSV = "../dataset/new_mbid_mappings.tsv"
JAMS_OUTPUT = "./jams/"
ROUND_PRECISION = 3
DUR_ONSETS = 30

if not os.path.exists(JAMS_OUTPUT):
    os.makedirs(JAMS_OUTPUT)
MBIDS_DF = pd.read_csv(MBIDS_TSV, sep="\t")

In [2]:
import musicbrainzngs
import requests

musicbrainzngs.set_useragent("ISMIR_HARMONIX_DATASET", "0.1", contact="oriol.nieto@gmail.com")
def get_track(track_mbid):
    """
    """
    includes = ["artists", "releases"] # <= Modify this to get the info you need
    data = requests.get("https://www.musicbrainz.org/track/{}".format(track_mbid))
    webpage = data.content.decode('utf-8')
    element_idx = webpage.find('id="{}"'.format(track_mbid))
    closer_idx = webpage.find('href="/recording', element_idx)
    recording_id = webpage[closer_idx + 17:closer_idx + 17 + 36]
    return musicbrainzngs.get_recording_by_id(recording_id, includes=includes)

In [13]:
def add_ann_metadata(ann):
    """Adds annotation metadata"""
    ann.annotation_metadata.curator.name = "Oriol Nieto"
    ann.annotation_metadata.curator.email = "onieto@pandora.com"
    ann.annotation_metadata.version = "1.0"
    ann.annotation_metadata.corpus = "DATASET"  # TODO: FINAL NAME!
    
def add_metadata(jam, mp3_info, file_name):
    """Add metadata to the JAMS object."""
    id_name = os.path.basename(file_name).replace(".mp3", "")
    jam.file_metadata.duration = round(mp3_info.info.time_secs, ROUND_PRECISION)
    if mp3_info.tag.artist and mp3_info.tag.album and mp3_info.tag.title:
        jam.file_metadata.artist = mp3_info.tag.artist
        jam.file_metadata.release = mp3_info.tag.album
        jam.file_metadata.title = mp3_info.tag.title
    else:
        # Look up in MusicBrainz
        mbid = MBIDS_DF[MBIDS_DF["trackname"] == id_name]["mbid"].iloc[0]
        try:
            mbrainzdata = get_track(mbid)
            # TODO: Which artist / release to get? So far, getting the first one, which should be the most relevant
            jam.file_metadata.artist = mbrainzdata["recording"]["artist-credit"][0]["artist"]["name"]
            jam.file_metadata.release = mbrainzdata["recording"]["release-list"][0]["title"]
            jam.file_metadata.title = mbrainzdata["recording"]["title"]
        except musicbrainzngs.ResponseError:
            jam.file_metadata.artist = ""
            jam.file_metadata.release = ""
            jam.file_metadata.title = ""
    
    jam.file_metadata.identifiers = {}
    try:
        jam.file_metadata.identifiers["Acoustid Id"] = \
            mp3_info.tag.user_text_frames.get(u"Acoustid Id").text
    except AttributeError:
        # TODO: Use pyacoustid?
        pass
    
    try:
        jam.file_metadata.identifiers["MusicBrainz Id"] = \
            mp3_info.tag.unique_file_ids.get("http://musicbrainz.org").uniq_id.decode('ascii')
    except AttributeError:
        jam.file_metadata.identifiers["MusicBrainz Id"] = \
            MBIDS_DF[MBIDS_DF["trackname"] == id_name]["mbid"].iloc[0]
        pass
    
    try:
        # Not all of the tracks have an associated release
        jam.file_metadata.identifiers["MusicBrainz Release Track Id"] = \
            mp3_info.tag.user_text_frames.get(u"MusicBrainz Release Track Id").text
    except AttributeError:
        pass
    
def add_beats_and_downbeats(jam, beat_dnbeat_txt):
    """Add beat and downbeat annotations to the JAMS object."""
    with open(beat_dnbeat_txt, "r") as f:
        beat_dnbeat_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='beat', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for beat_row in beat_dnbeat_data:
        time, beat_pos, bar_num = beat_row.split('\t')
        ann.append(time=round(float(time), ROUND_PRECISION), 
                   duration=0.0, 
                   confidence=1, 
                   value=int(beat_pos))
    
    # TODO: Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
    
def add_segmentation(jam, segmentation_txt):
    """Add segmentation annotations to the JAMS object."""
    with open(segmentation_txt, "r") as f:
        segment_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='segment_open', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for start, end in zip(segment_data[:-1], segment_data[1:]):
        start_time, start_label = start.replace('\n', '').split(' ')
        end_time, _ = end.replace('\n', '').split(' ')
        ann.append(time=round(float(start_time), ROUND_PRECISION), 
                   duration=round(float(end_time) - float(start_time), ROUND_PRECISION), 
                   confidence=1, 
                   value=start_label)
    
    # TODO: Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
def add_onsets(jam, mp3_file, dur=DUR_ONSETS):
    """Add onsets estimations to the JAMS object."""
    # Estimate onsets
    hop = 512
    y, sr = librosa.load(mp3_file, duration=dur)
    onset_frames = librosa.onset.onset_detect(y, sr=sr, hop_length=hop)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=hop)
    
    # Create annotation
    ann = jams.Annotation(namespace='onset', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for onset_time in onset_times:
        ann.append(time=round(float(onset_time), ROUND_PRECISION), 
                   duration=0, 
                   confidence=1, 
                   value=0)
    
    # Add Annotation Metadata
    add_ann_metadata(ann)
    ann.annotation_metadata.annotation_tools = "librosa {}".format(librosa.version.version)
    
    # Add annotation to 
    jam.annotations.append(ann)

def create_jams(mp3_file):
    """Creates the JAMS object"""
    beat_dnbeat_txt = "{}{}".format(
        BEAT_DOWNBEAT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    segment_txt = "{}{}".format(
        SEGMENT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    
    # Read ID3 tags
    mp3_info = eyed3.load(mp3_file)
    
    # Create empty JAMS object and add file metadata
    jam = jams.JAMS()
    add_metadata(jam, mp3_info, mp3_file)
    
    # Add annotations
    add_beats_and_downbeats(jam, beat_dnbeat_txt)
    add_segmentation(jam, segment_txt)
    add_onsets(jam, mp3_file)
    
    # Add the beats info
    return jam

In [None]:
for mp3_file in tqdm(MP3_FILES):
    out_file = "{}{}".format(JAMS_OUTPUT, os.path.basename(mp3_file).replace(".mp3", ".jams"))
    print(mp3_file)
    jam = create_jams(mp3_file)
    jam.save(out_file)