# JAMS Creation #

This notebook creates the JAMS data given the mp3 files (containing metadata in their ID3 tags) and the actual raw annotations.

In [28]:
import eyed3
import glob
import jams
import os
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import pickle
import librosa

DATASET_PATH = "../dataset/"
BEAT_DOWNBEAT_RAW_ANN_PATH = DATASET_PATH + "beats_and_downbeats/"
SEGMENT_RAW_ANN_PATH = DATASET_PATH + "segments/"
MP3_FILES = glob.glob("/Users/onieto/datasets/Harmonix/audio/*.mp3")
METADATA_TSV = "../dataset/metadata.csv"
JAMS_OUTPUT = "./jams_new/"
ROUND_PRECISION = 3
DUR_ONSETS = 30
VERSION = "1.1"

# Remove warnings for MP3 tags reader
eyed3.log.setLevel("ERROR")

if not os.path.exists(JAMS_OUTPUT):
    os.makedirs(JAMS_OUTPUT)
METADATA_DF = pd.read_csv(METADATA_TSV, sep=",")

In [29]:
import musicbrainzngs
import requests

musicbrainzngs.set_useragent("ISMIR_HARMONIX_DATASET", "0.1", contact="oriol.nieto@gmail.com")
def get_track(track_mbid):
    """
    """
    includes = ["artists", "releases"] # <= Modify this to get the info you need
    data = requests.get("https://www.musicbrainz.org/track/{}".format(track_mbid))
    webpage = data.content.decode('utf-8')
    element_idx = webpage.find('id="{}"'.format(track_mbid))
    closer_idx = webpage.find('href="/recording', element_idx)
    recording_id = webpage[closer_idx + 17:closer_idx + 17 + 36]
    return musicbrainzngs.get_recording_by_id(recording_id, includes=includes)

In [25]:
def add_ann_metadata(ann):
    """Adds annotation metadata"""
    ann.annotation_metadata.curator.name = "Oriol Nieto"
    ann.annotation_metadata.curator.email = "onieto@pandora.com"
    ann.annotation_metadata.version = VERSION
    ann.annotation_metadata.corpus = "Harmonix"
    
def add_metadata(jam, mp3_info, file_name):
    """Add metadata to the JAMS object."""
    id_name = os.path.basename(file_name).replace(".mp3", "")
    jam.file_metadata.duration = round(mp3_info.info.time_secs, ROUND_PRECISION)
    if mp3_info.tag.artist and mp3_info.tag.album and mp3_info.tag.title:
        jam.file_metadata.artist = mp3_info.tag.artist
        jam.file_metadata.release = mp3_info.tag.album
        jam.file_metadata.title = mp3_info.tag.title
    else:
        # Look up in MusicBrainz
        mbid = METADATA_DF[METADATA_DF["File"] == id_name]["MusicBrainz Id"].iloc[0]
        print(mbid)
        try:
            mbrainzdata = get_track(mbid)
            # TODO: Which artist / release to get? So far, getting the first one, which should be the most relevant
            jam.file_metadata.artist = mbrainzdata["recording"]["artist-credit"][0]["artist"]["name"]
            jam.file_metadata.release = mbrainzdata["recording"]["release-list"][0]["title"]
            jam.file_metadata.title = mbrainzdata["recording"]["title"]
        except musicbrainzngs.ResponseError:
            jam.file_metadata.artist = ""
            jam.file_metadata.release = ""
            jam.file_metadata.title = ""
    
    jam.file_metadata.identifiers = {}
    try:
        jam.file_metadata.identifiers["Acoustid Id"] = \
            mp3_info.tag.user_text_frames.get(u"Acoustid Id").text
    except AttributeError:
        # TODO: Use pyacoustid?
        pass
    
    try:
        jam.file_metadata.identifiers["MusicBrainz Id"] = \
            mp3_info.tag.unique_file_ids.get("http://musicbrainz.org").uniq_id.decode('ascii')
    except AttributeError:
        jam.file_metadata.identifiers["MusicBrainz Id"] = \
            METADATA_DF[METADATA_DF["File"] == id_name]["MusicBrainz Id"].iloc[0]
        pass
    
    try:
        # Not all of the tracks have an associated release
        jam.file_metadata.identifiers["MusicBrainz Release Track Id"] = \
            mp3_info.tag.user_text_frames.get(u"MusicBrainz Release Track Id").text
    except AttributeError:
        pass
    
def add_beats_and_downbeats(jam, beat_dnbeat_txt):
    """Add beat and downbeat annotations to the JAMS object."""
    with open(beat_dnbeat_txt, "r") as f:
        beat_dnbeat_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='beat', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for beat_row in beat_dnbeat_data:
        time, beat_pos, bar_num = beat_row.split('\t')
        ann.append(time=round(float(time), ROUND_PRECISION), 
                   duration=0.0, 
                   confidence=1, 
                   value=int(beat_pos))
    
    # Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
    
def add_segmentation(jam, segmentation_txt):
    """Add segmentation annotations to the JAMS object."""
    with open(segmentation_txt, "r") as f:
        segment_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='segment_open', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for start, end in zip(segment_data[:-1], segment_data[1:]):
        start_time, start_label = start.replace('\n', '').split(' ')
        end_time, _ = end.replace('\n', '').split(' ')
        ann.append(time=round(float(start_time), ROUND_PRECISION), 
                   duration=round(float(end_time) - float(start_time), ROUND_PRECISION), 
                   confidence=1, 
                   value=start_label)
    
    # TODO: Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
def add_onsets(jam, mp3_file, dur=DUR_ONSETS):
    """Add onsets estimations to the JAMS object."""
    # Estimate onsets
    hop = 512
    y, sr = librosa.load(mp3_file, duration=dur)
    onset_frames = librosa.onset.onset_detect(y, sr=sr, hop_length=hop)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=hop)
    
    # Create annotation
    ann = jams.Annotation(namespace='onset', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for onset_time in onset_times:
        ann.append(time=round(float(onset_time), ROUND_PRECISION), 
                   duration=0, 
                   confidence=1, 
                   value=0)
    
    # Add Annotation Metadata
    add_ann_metadata(ann)
    ann.annotation_metadata.annotation_tools = "librosa {}".format(librosa.version.version)
    
    # Add annotation to 
    jam.annotations.append(ann)

def create_jams(mp3_file):
    """Creates the JAMS object"""
    beat_dnbeat_txt = "{}{}".format(
        BEAT_DOWNBEAT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    segment_txt = "{}{}".format(
        SEGMENT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    
    # Read ID3 tags
    mp3_info = eyed3.load(mp3_file)
    
    # Create empty JAMS object and add file metadata
    jam = jams.JAMS()
    add_metadata(jam, mp3_info, mp3_file)
    
    # Add annotations
    add_beats_and_downbeats(jam, beat_dnbeat_txt)
    add_segmentation(jam, segment_txt)
    add_onsets(jam, mp3_file)
    
    # Add the beats info
    return jam

In [None]:
# Create the actual JAMS
for mp3_file in tqdm(MP3_FILES):
    out_file = "{}{}".format(JAMS_OUTPUT, os.path.basename(mp3_file).replace(".mp3", ".jams"))
    print(mp3_file)
    jam = create_jams(mp3_file)
    jam.save(out_file)

HBox(children=(IntProgress(value=0, max=912), HTML(value='')))

/Users/onieto/datasets/Harmonix/audio/0001_12step.mp3
0408655f-189f-371b-9c41-ec861e1a7810
/Users/onieto/datasets/Harmonix/audio/0003_6foot7foot.mp3
83347ae2-5def-378a-a3f5-96ec56c25ab7
/Users/onieto/datasets/Harmonix/audio/0004_abc.mp3
/Users/onieto/datasets/Harmonix/audio/0005_again.mp3
09aed1ac-4094-3337-86ef-8303531d57f1
/Users/onieto/datasets/Harmonix/audio/0006_aint2proud2beg.mp3
09723bc0-b3e9-4f86-a563-c80d25df049e
/Users/onieto/datasets/Harmonix/audio/0008_america.mp3
/Users/onieto/datasets/Harmonix/audio/0009_americanmusic.mp3
/Users/onieto/datasets/Harmonix/audio/0010_andjusticeforall.mp3
/Users/onieto/datasets/Harmonix/audio/0011_areyouexperienced.mp3
8418c5f0-3649-4cd1-972e-613be797a141
/Users/onieto/datasets/Harmonix/audio/0012_aroundtheworld.mp3
6e87e720-67f1-35bc-923b-6e5f642610b3
/Users/onieto/datasets/Harmonix/audio/0013_athingaboutyou.mp3
/Users/onieto/datasets/Harmonix/audio/0014_babaoriley.mp3
/Users/onieto/datasets/Harmonix/audio/0015_babygotback.mp3
9fdde642-2f7a-

/Users/onieto/datasets/Harmonix/audio/0116_goodies.mp3
1786078c-b374-399d-bae6-47dd0d9a131a
/Users/onieto/datasets/Harmonix/audio/0117_goodmorningblackfriday.mp3
/Users/onieto/datasets/Harmonix/audio/0118_grenade.mp3
031f9780-e7ca-37cc-9472-8b8896a397a3
/Users/onieto/datasets/Harmonix/audio/0119_gunpowderandlead.mp3
/Users/onieto/datasets/Harmonix/audio/0120_hallowedbethyname.mp3
0622ee9a-edc3-42ae-8359-60a6866f2942
/Users/onieto/datasets/Harmonix/audio/0122_heardemall.mp3
39635547-a1ca-3b38-8d6d-f223b25d301e
/Users/onieto/datasets/Harmonix/audio/0123_heavyduty.mp3
/Users/onieto/datasets/Harmonix/audio/0124_hello.mp3
/Users/onieto/datasets/Harmonix/audio/0125_hellogoodmorning.mp3
349a6c39-2214-3a16-a41c-310c26531254
/Users/onieto/datasets/Harmonix/audio/0126_heymami.mp3
075b9864-8a6a-358c-bc5e-a435d7caf026
/Users/onieto/datasets/Harmonix/audio/0127_hollabackgirl.mp3
04af5a43-6c05-4e25-9329-19e880c10c22
/Users/onieto/datasets/Harmonix/audio/0128_homecoming.mp3
/Users/onieto/datasets/Har

/Users/onieto/datasets/Harmonix/audio/0239_sandm.mp3
045b9acc-628e-46a4-8743-c6d2f0b0e735
/Users/onieto/datasets/Harmonix/audio/0240_sandstorm.mp3
33a19e07-b41a-4641-93e4-afdf43d4876f
/Users/onieto/datasets/Harmonix/audio/0241_satellite.mp3
/Users/onieto/datasets/Harmonix/audio/0242_satisfaction.mp3
016208a5-8d35-30d1-8ef7-1c68ba4f2569
/Users/onieto/datasets/Harmonix/audio/0243_saucyjack.mp3
/Users/onieto/datasets/Harmonix/audio/0244_sayaah.mp3
1b58415f-01e2-32f5-a098-824eb73d5c28
/Users/onieto/datasets/Harmonix/audio/0245_sayhey.mp3
0c2bf8cb-4f5c-32fd-9029-e089d6d77bec
/Users/onieto/datasets/Harmonix/audio/0246_scenario.mp3
/Users/onieto/datasets/Harmonix/audio/0247_scream.mp3
/Users/onieto/datasets/Harmonix/audio/0248_screamingfor.mp3
/Users/onieto/datasets/Harmonix/audio/0249_sensualseduction.mp3
/Users/onieto/datasets/Harmonix/audio/0250_sexyandiknowit.mp3
0cff2b87-7971-486b-8480-ebab07e8ce95
/Users/onieto/datasets/Harmonix/audio/0251_sexychick.mp3
13ffa917-1cca-33fd-9392-cf076ba79

/Users/onieto/datasets/Harmonix/audio/0355_brokenhearted.mp3
/Users/onieto/datasets/Harmonix/audio/0356_californiagurls.mp3
/Users/onieto/datasets/Harmonix/audio/0357_cheers.mp3
/Users/onieto/datasets/Harmonix/audio/0358_children.mp3
/Users/onieto/datasets/Harmonix/audio/0359_cominghome.mp3
/Users/onieto/datasets/Harmonix/audio/0360_coolerthanme.mp3
/Users/onieto/datasets/Harmonix/audio/0361_countrymustbecountrywide.mp3
/Users/onieto/datasets/Harmonix/audio/0362_crazygirl.mp3
/Users/onieto/datasets/Harmonix/audio/0363_darkside.mp3
/Users/onieto/datasets/Harmonix/audio/0364_deuces.mp3
/Users/onieto/datasets/Harmonix/audio/0365_dirtroadanthem.mp3
/Users/onieto/datasets/Harmonix/audio/0366_domino.mp3
/Users/onieto/datasets/Harmonix/audio/0367_dontdreamitsover.mp3
/Users/onieto/datasets/Harmonix/audio/0368_dontwakemeup.mp3
/Users/onieto/datasets/Harmonix/audio/0369_dontyouworrychild.mp3
/Users/onieto/datasets/Harmonix/audio/0370_downonme.mp3
/Users/onieto/datasets/Harmonix/audio/0371_downu

/Users/onieto/datasets/Harmonix/audio/0495_someonelikeyou.mp3
/Users/onieto/datasets/Harmonix/audio/0496_sparksfly.mp3
/Users/onieto/datasets/Harmonix/audio/0497_speakers.mp3
/Users/onieto/datasets/Harmonix/audio/0498_springsteen.mp3
/Users/onieto/datasets/Harmonix/audio/0499_starbuckssmile.mp3
/Users/onieto/datasets/Harmonix/audio/0500_stay.mp3
/Users/onieto/datasets/Harmonix/audio/0501_stayawhile.mp3
/Users/onieto/datasets/Harmonix/audio/0502_stronger.mp3
/Users/onieto/datasets/Harmonix/audio/0503_stucklikeglue.mp3
/Users/onieto/datasets/Harmonix/audio/0504_suspiciousminds.mp3
/Users/onieto/datasets/Harmonix/audio/0505_takeitoff.mp3
/Users/onieto/datasets/Harmonix/audio/0506_takemehometonight.mp3
/Users/onieto/datasets/Harmonix/audio/0507_teenagedream.mp3
/Users/onieto/datasets/Harmonix/audio/0508_tennesseeme.mp3
/Users/onieto/datasets/Harmonix/audio/0509_thelazysong.mp3
/Users/onieto/datasets/Harmonix/audio/0510_theonethatgotaway.mp3
/Users/onieto/datasets/Harmonix/audio/0511_theonl

In [None]:
# TODO:
# Update version number