In [185]:
import mido
import os
import pandas as pd
import numpy as np
from aaapi import MidiUtils

In [186]:
midi_dir = '../dataset/midi'
midi_files = os.listdir(midi_dir)

In [187]:
destination_dir = '../dataset/midi_by_tracks'

In [188]:
def is_valid(track):
    expecting_on = True
    for msg in track:
        if 'note' in msg.type:
            if (expecting_on and msg.type == 'note_on') or (not expecting_on and msg.type == 'note_off'):
                expecting_on = not expecting_on
            else:
                return False
    return True

In [189]:
def has_note(track):
    for msg in track:
        if 'note' in msg.type:
            return True
    return False

def get_notes(track):
    added_rests = 0
    msgs = list()
    for msg in track:
        if 'note' in msg.type:
            if added_rests:
                msg.time += added_rests
                added_rests = 0
            msgs += [msg]
        else:
            added_rests += msg.time
    return msgs

def get_tempo(track):
    for msg in track:
        if 'tempo' in msg.type:
            return msg.tempo
    return None

def convert_time_to_millisecs(track, ticks, tempo):
    for msg in track:
        if 'note' in msg.type:
            new_time = round(mido.tick2second(msg.time, ticks, tempo) * 1000)
            msg.time = new_time
    return track

def trim_first_rest(track):
    for msg in track:
        if msg.type == 'note_on':
            msg.time = 0
            return track
    return track

In [190]:
for file in midi_files:
    midi = mido.MidiFile(os.path.join(midi_dir, file))
    try:
        os.mkdir(os.path.join(destination_dir, file))
        print('Created dir')
    except FileExistsError as e:
        print('Content already exist')
        continue
    
    try:
        tempo = max([j for j in [get_tempo(t) for t in midi.tracks] if j is not None])
    except ValueError as e:
        tempo = -1
    ticks = midi.ticks_per_beat
    
    print("tempo: {}, ticks: {}".format(tempo, ticks))
    
    note_tracks = [t for t in midi.tracks if has_note(t)]

    if tempo > 0:
        print("Correcting tempo to milliseconds")
        note_tracks = [convert_time_to_millisecs(t, ticks, tempo) for t in note_tracks]
    for i, track in enumerate(note_tracks):
        valid = is_valid(track)
        track = mido.MidiTrack(track)
        new_midi_name = os.path.join(destination_dir, file, 'track{}-{}.mid'.format(i, 'single' if valid else 'poly'))
        new_midi = mido.MidiFile()
        new_midi.tracks = [track]
        new_midi.save(new_midi_name)
        

Created dir
tempo: 600000, ticks: 120
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 625000, ticks: 120
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 500000, ticks: 480
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 400000, ticks: 480
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 600000, ticks: 120
Correcting tempo to milliseconds
Content already exist
Content already exist
Created dir
tempo: 869565, ticks: 192
Correcting tempo to milliseconds
Created dir
tempo: 600000, ticks: 120
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 666666, ticks: 480
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 413793, ticks: 96
Correcting tempo to milliseconds
Created dir
tempo: 500000, ticks: 96
Correcting tempo to milliseconds
Content already exist
Created dir
tempo: 499999, ticks: 1024
Correcting tempo to milliseconds
Created dir
tempo: 689

In [158]:
note_tracks[0]

<midi track '' 15081 messages>

In [96]:
t = midi.tracks[0]
note_df = list()
for msg in t:
    mtype = 'meta'
    time = msg.time
    note = -1
    channel = -1
    if 'note' in msg.type:
        if msg.velocity == 0:
            mtype = 'note_off'
        else:
            mtype = 'note_on'
        note = msg.note
        channel = msg.channel

    note_df += [[mtype, note, time, channel]]
note_df = pd.DataFrame(note_df, columns = ['type', 'note', 'time', 'channel'])
u_channel = note_df['channel'].unique()
u_channel = u_channel[u_channel > -1]

In [164]:
for chan in u_channel:
    # Group by channel
    chan_idx = (note_df['channel'] == chan)
    chan_grping = (chan_idx != chan_idx.shift().fillna(chan_idx[0])).cumsum()
    new_grp = list()
    for i, v in note_df.groupby(chan_grping):
        if v['channel'].iloc[0] == chan:
            for j, (m, n, t, c) in v.iterrows():
                new_grp += [[m, n, t]]
            continue
        t = v['time'].sum()
        new_grp += [['rest', -1, t]]
    if new_grp[0][1] == -1:
        new_grp = new_grp[1:]
    if new_grp[-1][1] == -1:
        new_grp = new_grp[:-1]
    new_grp = pd.DataFrame(new_grp, columns=['type', 'note', 'time'])
    
    # merge metadata as rests
    onrest_comb = list()
    added_rest = 0
    for i, (t, n, d) in new_grp.iterrows():
        if len(onrest_comb) < 1:
            onrest_comb += [[t, n, d]]
            continue

        if t == 'rest':
            added_rest += d
            continue

        if t == 'note_off' and n != onrest_comb[-1][1]:
            added_rest += d
            continue #skip
        if t == 'note_on' and onrest_comb[-1][0] == 'note_on':
            added_rest += d
            continue # skip
        if added_rest:
            d += added_rest
            added_rest = 0

        onrest_comb += [[t, n, d]]
    
    # transform timeframe into note duration
    onrest_comb = pd.DataFrame(onrest_comb, columns=['type', 'note', 'time'])
    onrest_comb.loc[:, 'time'] = onrest_comb['time'].shift(-1).fillna(50).astype(int)
    onrest_comb.loc[onrest_comb['type'] == 'note_on', 'type'] = 'note'
    onrest_comb.loc[onrest_comb['type'] == 'note_off', ['type', 'note']] = ['rest', -1]
    
    # save to midi
    aaapi.MidiUtils.csv_to_midi(
        onrest_comb, 
        'channel{}'.format(chan), 
        duration_column='time')

In [162]:
MidiUtils.csv_to_midi

<function aaapi.MidiUtils.csv_to_midi(df, dest, note_column='note', duration_column='duration', rest_notation=-1)>

In [99]:
round(mido.tick2second(10, ticks, tempo) * 1000)

ValueError: max() arg is an empty sequence

In [74]:
help(mido.tick2second)

Help on function tick2second in module mido.midifiles.units:

tick2second(tick, ticks_per_beat, tempo)
    Convert absolute time in ticks to seconds.
    
    Returns absolute time in seconds for a chosen MIDI file time
    resolution (ticks per beat, also called PPQN or pulses per quarter
    note) and tempo (microseconds per beat).



In [109]:
midi.tracks

[<midi track '' 4 messages>,
 <midi track 'Hypersaw heaven 2 (MIDI)' 247 messages>,
 <midi track 'Deep2 (MIDI)' 763 messages>,
 <midi track 'FPC (MIDI)' 2027 messages>,
 <midi track '4 drawbars (MIDI)' 335 messages>,
 <midi track 'Keys (MIDI)' 931 messages>,
 <midi track 'Keys #2 (MIDI)' 749 messages>]

In [17]:
a = mido.MidiTrack()
a.extend(midi.tracks[0])

In [18]:
a

<midi track '' 4 messages>

['taylor_swift-you_belong_with_me.mid',
 'westlife-flying_without_wings.mid',
 'taylor_swift-begin_again.mid',
 'taylor_swift-shake_it_off.mid',
 'taylor_swift-safe_and_sound_feat_the_civil_wars.mid',
 'bruno_mars-locked_out_of_heaven.mid',
 'taylor_swift-i_knew_you_were_trouble.mid',
 'Backstreet Boys - I Want It That Way.mid',
 'taylor_swift-untouchable.mid',
 'taylor_swift-mine.mid',
 'Westlife_Swear_It_Again.mid',
 'taylor_swift-youre_not_sorry.mid',
 'avril_lavigne-when_youre_gone.mid',
 'WESTLIFE.I lay my love on you.mid',
 'bruno_mars-talking_to_the_moon.mid',
 'bruno_mars-just_the_way_you_are.mid',
 'taylor_swift-red.mid',
 'taylor_swift-back_to_december.mid',
 'avril_lavigne-wish_you_were_here.mid',
 'taylor_swift-love_story.mid',
 'taylor_swift-speak_now.mid',
 'john_legend-all_of_me.mid',
 'ed_sheeran-thinking_out_loud.mid',
 'taylor_swift-white_horse.mid',
 'Britney_Spears_-_You_Drive_Me_Crazy.mid',
 'WESTLIFE.Up town girl K.mid',
 'BRITNEY SPEARS.Lucky K.mid',
 'bruno_mars

In [181]:
transformed = os.listdir(os.path.join(destination_dir))

In [191]:
for i in midi_files:
    if i not in transformed:
        try:
            print("removed", i)
            os.remove(os.path.join(midi_dir, i))
        except FileNotFoundError as e:
            print(i, "has already been removed")

removed evanescence-bring_me_to_life.mid
removed evanescence-heart_shaped_box.mid
removed demi_lovato-give_your_heart_a_break.mid
removed fall_out_boy-beat_it.mid
removed coldplay-the_scientist.mid
removed coldplay-trouble.mid
removed demi_lovato-skyscraper.mid
removed alicia_keys-no_one.mid
removed avril_lavigne-what_the_hell.mid
removed christina_perri-a_thousand_years.mid
removed demi_lovato-everytime_you_lie.mid
removed demi_lovato-heart_attack_piano_version.mid
removed avril_lavigne-unwanted.mid
removed coldplay-speed_of_sound.mid
removed christina_aguilera-car_wash.mid
removed GREENDAY.Good riddance time of your life.mid
removed FOOL'S GARDEN.Lemon tree.mid
removed adele-chasing_pavements_version_2.mid
removed demi_lovato-fix_a_heart.mid
removed celine_dion-my_heart_will_go_on_version_2.mid
removed GREENDAY.In the end.mid
removed evanescence-taking_over_me.mid
removed Can't-Take-My-Eyes-Off-You.mid
removed carly_rae_jepsen-call_me_maybe.mid
