# JAMS Creation #

This notebook creates the JAMS data given the mp3 files (containing metadata in their ID3 tags) and the actual raw annotations.

In [3]:
import eyed3
import glob
import jams
import os
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import pickle
import librosa

DATASET_PATH = "../dataset/"
BEAT_DOWNBEAT_RAW_ANN_PATH = DATASET_PATH + "beats/"
SEGMENT_RAW_ANN_PATH = DATASET_PATH + "segments/"
MP3_FILES = glob.glob("/Users/onieto/Desktop/Harmonix/final_audio/*.mp3")
MBIDS_TSV = "../dataset/new_mbid_mappings.tsv"
JAMS_OUTPUT = "./jams/"
ROUND_PRECISION = 3
DUR_ONSETS = 30

if not os.path.exists(JAMS_OUTPUT):
    os.makedirs(JAMS_OUTPUT)
MBIDS_DF = pd.read_csv(MBIDS_TSV, sep="\t")

In [20]:
print(MBIDS_DF[MBIDS_DF["trackname"] == "0001_12step"]["mbid"][0])
MBIDS_DF.head()

0408655f-189f-371b-9c41-ec861e1a7810


Unnamed: 0.1,Unnamed: 0,trackname,mbid
0,0,0001_12step,0408655f-189f-371b-9c41-ec861e1a7810
1,1,0003_6foot7foot,83347ae2-5def-378a-a3f5-96ec56c25ab7
2,2,0004_abc,1068e183-db62-4f47-8f7d-15c9ce56e79a
3,3,0005_again,09aed1ac-4094-3337-86ef-8303531d57f1
4,4,0006_aint2proud2beg,09723bc0-b3e9-4f86-a563-c80d25df049e


In [25]:
def add_ann_metadata(ann):
    """Adds annotation metadata"""
    ann.annotation_metadata.curator.name = "Oriol Nieto"
    ann.annotation_metadata.curator.email = "onieto@pandora.com"
    ann.annotation_metadata.version = "1.0"
    ann.annotation_metadata.corpus = "DATASET"  # TODO: FINAL NAME!
    
def add_metadata(jam, mp3_info, file_name):
    """Add metadata to the JAMS object."""
    id_name = os.path.basename(file_name).replace(".mp3", "")
    print(id_name)
    print(mp3_info)
    jam.file_metadata.duration = round(mp3_info.info.time_secs, ROUND_PRECISION)
    jam.file_metadata.artist = mp3_info.tag.artist
    jam.file_metadata.release = mp3_info.tag.album
    jam.file_metadata.title = mp3_info.tag.title
    
    jam.file_metadata.identifiers = {
        "Acoustid Id": mp3_info.tag.user_text_frames.get(u"Acoustid Id").text}
    
    jam.file_metadata.identifiers["MusicBrainz Id"] = \
        mp3_info.tag.unique_file_ids.get("http://musicbrainz.org").uniq_id.decode('ascii')
    try:
        # Not all of the tracks have an associated release
        jam.file_metadata.identifiers["MusicBrainz Release Track Id"] = \
            mp3_info.tag.user_text_frames.get(u"MusicBrainz Release Track Id").text
    except AttributeError:
        pass
    
def add_beats_and_downbeats(jam, beat_dnbeat_txt):
    """Add beat and downbeat annotations to the JAMS object."""
    with open(beat_dnbeat_txt, "r") as f:
        beat_dnbeat_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='beat', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for beat_row in beat_dnbeat_data:
        time, beat_pos, bar_num = beat_row.split('\t')
        ann.append(time=round(float(time), ROUND_PRECISION), 
                   duration=0.0, 
                   confidence=1, 
                   value=int(beat_pos))
    
    # TODO: Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
    
def add_segmentation(jam, segmentation_txt):
    """Add segmentation annotations to the JAMS object."""
    with open(segmentation_txt, "r") as f:
        segment_data = f.readlines()
    
    # Create annotation
    ann = jams.Annotation(namespace='segment_open', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for start, end in zip(segment_data[:-1], segment_data[1:]):
        start_time, start_label = start.replace('\n', '').split(' ')
        end_time, _ = end.replace('\n', '').split(' ')
        ann.append(time=round(float(start_time), ROUND_PRECISION), 
                   duration=round(float(end_time) - float(start_time), ROUND_PRECISION), 
                   confidence=1, 
                   value=start_label)
    
    # TODO: Add Annotation Metadata
    add_ann_metadata(ann)
    
    # Add annotation to 
    jam.annotations.append(ann)
    
def add_onsets(jam, mp3_file, dur=DUR_ONSETS):
    """Add onsets estimations to the JAMS object."""
    # Estimate onsets
    hop = 512
    y, sr = librosa.load(mp3_file, duration=dur)
    onset_frames = librosa.onset.onset_detect(y, sr=sr, hop_length=hop)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=hop)
    
    # Create annotation
    ann = jams.Annotation(namespace='onset', time=0, 
                          duration=jam.file_metadata.duration)
    
    # Add annotations
    for onset_time in onset_times:
        ann.append(time=round(float(onset_time), ROUND_PRECISION), 
                   duration=0, 
                   confidence=1, 
                   value=0)
    
    # Add Annotation Metadata
    add_ann_metadata(ann)
    ann.annotation_metadata.annotation_tools = "librosa {}".format(librosa.version.version)
    
    # Add annotation to 
    jam.annotations.append(ann)

def create_jams(mp3_file):
    """Creates the JAMS object"""
    beat_dnbeat_txt = "{}{}".format(
        BEAT_DOWNBEAT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    segment_txt = "{}{}".format(
        SEGMENT_RAW_ANN_PATH, 
        os.path.basename(mp3_file).replace(".mp3", ".txt"))
    
    # Read ID3 tags
    mp3_info = eyed3.load(mp3_file)
    
    # Create empty JAMS object and add file metadata
    jam = jams.JAMS()
    add_metadata(jam, mp3_info, mp3_file)
    
    # Add annotations
    add_beats_and_downbeats(jam, beat_dnbeat_txt)
    add_segmentation(jam, segment_txt)
    add_onsets(jam, mp3_file)
    
    # Add the beats info
    return jam

In [27]:
for mp3_file in tqdm(MP3_FILES):
    out_file = "{}{}".format(JAMS_OUTPUT, os.path.basename(mp3_file).replace(".mp3", ".jams"))
    print(mp3_file)
    jam = create_jams(mp3_file)
    jam.save(out_file)
    break

HBox(children=(IntProgress(value=0, max=912), HTML(value='')))

GEOB frame does not contain a mime type
GEOB frame does not contain a valid mime type


/Users/onieto/Desktop/Harmonix/final_audio/0001_12step.mp3
0001_12step
<eyed3.mp3.Mp3AudioFile object at 0x115fae2e8>



AttributeError: 'NoneType' object has no attribute 'text'

In [7]:
MP3_FILES[0]

'/Users/onieto/Desktop/Harmonix/final_audio/0004_abc.mp3'

In [14]:
import librosa



array([ 0.06965986,  0.30185941,  0.69659864,  0.90557823,  1.021678  ,
        1.36997732,  1.60217687,  1.69505669,  2.04335601,  2.36843537,
        2.5077551 ,  2.92571429,  3.27401361,  3.62231293,  3.80807256,
        4.15637188,  4.29569161,  4.59755102,  4.96907029,  5.13160998,
        5.61922902,  5.92108844,  6.26938776,  6.43192744,  6.7570068 ,
        6.91954649,  7.24462585,  7.56970522,  7.75546485,  7.91800454,
        8.2430839 ,  8.54494331,  8.87002268,  9.03256236,  9.19510204,
        9.38086168,  9.49696145,  9.82204082, 10.14712018, 10.30965986,
       10.47219955, 10.65795918, 10.79727891, 10.98303855, 11.09913832,
       11.42421769, 11.58675737, 11.74929705, 11.91183673, 12.05115646,
       12.37623583, 12.70131519, 12.86385488, 13.02639456, 13.2121542 ,
       13.35147392, 13.65333333, 13.9784127 , 14.14095238, 14.30349206,
       14.46603175, 14.62857143, 14.81433107, 14.93043084, 15.11619048,
       15.2555102 , 15.41804989, 15.58058957, 15.76634921, 15.90

In [12]:
x

array([   3,   13,   30,   39,   44,   59,   69,   73,   88,  102,  108,
        126,  141,  156,  164,  179,  185,  198,  214,  221,  242,  256,
        270,  277,  291,  298,  312,  326,  334,  341,  355,  368,  382,
        389,  396,  404,  409,  423,  437,  444,  451,  459,  465,  473,
        478,  492,  499,  506,  513,  520,  533,  547,  554,  561,  569,
        575,  588,  602,  609,  616,  624,  630,  638,  643,  651,  657,
        664,  671,  679,  685,  698,  713,  719,  726,  733,  739,  747,
        752,  760,  766,  773,  781,  788,  794,  807,  821,  828,  849,
        862,  871,  877,  884,  891,  899,  904,  917,  932,  938,  946,
        959,  972,  981,  987,  994, 1015, 1027, 1042, 1048, 1056, 1063,
       1069, 1082, 1096, 1124, 1137, 1153, 1159, 1167, 1174, 1180, 1187,
       1193, 1207, 1214, 1235, 1248, 1263, 1269, 1276, 1283, 1289, 1296,
       1302, 1311, 1318, 1324, 1343, 1356, 1372, 1379, 1385, 1392, 1399,
       1407, 1412, 1420, 1426, 1432, 1434, 1454, 14