In [6]:
import pickle
import numpy as np
from pypianoroll import Multitrack, Track
import pretty_midi
import shutil
import pypianoroll
# import librosa
import errno
import scipy.sparse
import json
import sys
from os import listdir
from os.path import isfile, join
import scipy.io as sio
import os
from scipy.sparse import csc_matrix

In [10]:
ROOT_PATH = ''
MIDI_DICT_PATH = 'lmd_rock_midi_clean.json'
converter_path = os.path.join(ROOT_PATH, 'lmd_matched_rock_track')
cleaner_path = os.path.join(ROOT_PATH, 'lmd_matched_clean_rock')
FILENAMES = np.load('rock_list.npy')

In [8]:
def get_midi_info(pm):
    """Return useful information from a pretty_midi.PrettyMIDI instance"""
    if pm.time_signature_changes:
        pm.time_signature_changes.sort(key=lambda x: x.time)
        first_beat_time = pm.time_signature_changes[0].time
    else:
        first_beat_time = pm.estimate_beat_start()

    tc_times, tempi = pm.get_tempo_changes()

    if len(pm.time_signature_changes) == 1:
        time_sign = '{}/{}'.format(pm.time_signature_changes[0].numerator,
                                   pm.time_signature_changes[0].denominator)
    else:
        time_sign = None

    midi_info = {
        'first_beat_time': first_beat_time,
        'num_time_signature_change': len(pm.time_signature_changes),
        'time_signature': time_sign,
        'tempo': tempi[0] if len(tc_times) == 1 else None
    }

    return midi_info

def midi_filter(midi_info):
    """Return True for qualified midi files and False for unwanted ones"""
    if midi_info['first_beat_time'] > 0.0:
        return False
    elif midi_info['num_time_signature_change'] > 1:
        return False
    elif midi_info['time_signature'] not in ['4/4']:
        return False
    return True

def get_merged(multitrack):
    """Return a `pypianoroll.Multitrack` instance with piano-rolls merged to
    five tracks (Bass, Drums, Guitar, Piano and Strings)"""
    category_list = {'Bass': [], 'Drums': [], 'Guitar': [], 'Piano': [], 'Strings': []}
    program_dict = {'Piano': 0, 'Drums': 0, 'Guitar': 24, 'Bass': 32, 'Strings': 48}

    for idx, track in enumerate(multitrack.tracks):
        if track.is_drum:
            category_list['Drums'].append(idx)
        elif track.program//8 == 0:
            category_list['Piano'].append(idx)
        elif track.program//8 == 3:
            category_list['Guitar'].append(idx)
        elif track.program//8 == 4:
            category_list['Bass'].append(idx)
        else:
            category_list['Strings'].append(idx)
    tracks = []
    for key in category_list:
        if category_list[key]:
            merged = multitrack[category_list[key]].get_merged_pianoroll()
            tracks.append(Track(merged, program_dict[key], key == 'Drums', key))
        else:
            tracks.append(Track(None, program_dict[key], key == 'Drums', key))
    return Multitrack(None, tracks, multitrack.tempo, multitrack.downbeat, multitrack.beat_resolution, multitrack.name)

def converter(filepath):
    """Save a multi-track piano-roll converted from a MIDI file to target
    dataset directory and update MIDI information to `midi_dict
    The sampling rate is set to 12
    """
    try:
        midi_name = os.path.splitext(os.path.basename(filepath))[0]
        multitrack = Multitrack(beat_resolution=12, name=midi_name)
        tracks = ['Bass', 'Drums', 'Guitar', 'Piano', 'Strings']
        pm = pretty_midi.PrettyMIDI(filepath)
        midi_info = get_midi_info(pm)
        multitrack.parse_pretty_midi(pm)
        merged = get_merged(multitrack)

        make_sure_path_exists(converter_path)
        merged.save(os.path.join(converter_path, midi_name + '.npz'))
        
        return [midi_name, midi_info]

    except:
        return None
    
def make_sure_path_exists(path):
    """Create all intermediate-level directories if the given path does not
    exist"""
    try:
        os.makedirs(path)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise

In [11]:
def convert():
    """Main function of the converter"""
    midi_paths = FILENAMES.tolist()
    midi_dict = {}
    kv_pairs = [converter(midi_path) for midi_path in midi_paths]
    for kv_pair in kv_pairs:
        if kv_pair is not None:
            #print kv_pair
            midi_dict[kv_pair[0]] = kv_pair[1]

    with open(os.path.join(ROOT_PATH, 'lmd_midis.json'), 'w') as outfile:
        json.dump(midi_dict, outfile)

    print("[Done] {} files out of {} have been successfully converted".format(len(midi_dict), len(midi_paths)))

    with open(os.path.join(ROOT_PATH, 'lmd_midis.json')) as infile:
        midi_dict = json.load(infile)
    count = 0
    make_sure_path_exists(cleaner_path)
    midi_dict_clean = {}
    for key in midi_dict:
        if midi_filter(midi_dict[key]):
            midi_dict_clean[key] = midi_dict[key]
            count += 1
            shutil.copyfile(os.path.join(converter_path, key + '.npz'),
                            os.path.join(cleaner_path, key + '.npz'))

    with open(os.path.join(ROOT_PATH, MIDI_DICT_PATH), 'w') as outfile:
        json.dump(midi_dict_clean, outfile)

    print("[Done] {} files out of {} have been successfully cleaned".format(count, len(midi_dict)))

convert()

[Done] 666 files out of 700 have been successfully converted
[Done] 666 files out of 666 have been successfully cleaned


### Using json files to generate data 
For each song, reshape it into 48 * 128 and ignore starting time > 0 and time not in 4/4

In [12]:
DATA_SET_ROOT = ''
KIND = 'matched'
SUBSET_LIST = FILENAMES
RESULT_PATH = 'lmd_matched_rock_track'
LAST_BAR_MODE = 'remove'
FILETYPE = 'npz'
from scipy.sparse import csc_matrix

In [13]:
def get_track_to_array(npz_path, idx):
    data = np.load(npz_path)
    indices = data['pianoroll_' + str(idx) + '_csc_indices']
    indptr = data['pianoroll_' + str(idx) + '_csc_indptr']
    d = data['pianoroll_' + str(idx) + '_csc_data']
    matrix = csc_matrix((d, indices, indptr), shape = data['pianoroll_' + str(idx) + '_csc_shape']).toarray()
    return matrix
    
def get_largest_piano_roll(npz_path):
    piano_roll = get_track_to_array(npz_path, 0)
    for idx in range(5):
        piano_roll_sparse = get_track_to_array(npz_path, idx)
        if(piano_roll_sparse.shape[0] > piano_roll.shape[0]):
            piano_roll = piano_roll_sparse
    if int(piano_roll.shape[0]%48) is not 0:
        if LAST_BAR_MODE == 'fill':
            piano_roll = np.concatenate((piano_roll, np.zeros((48-piano_roll.shape[0]%48, 128))), axis=0)
        elif LAST_BAR_MODE == 'remove':
            piano_roll = np.delete(piano_roll, np.s_[-int(piano_roll.shape[0]%48):], axis=0)
    piano_roll = piano_roll.reshape(-1,48,128)
    return piano_roll

def get_bar_piano_roll(npz_path, instrument):
    dict = {'Guitar': 0, 'Piano': 1, 'Bass': 2, 'Drums':3, 'Strings': 4}
    piano_roll = get_track_to_array(npz_path, dict[instrument])
    
    if int(piano_roll.shape[0]%48) is not 0:
        if LAST_BAR_MODE == 'fill':
            piano_roll = np.concatenate((piano_roll, np.zeros((48-piano_roll.shape[0]%48, 128))), axis=0)
        elif LAST_BAR_MODE == 'remove':
            piano_roll = np.delete(piano_roll, np.s_[-int(piano_roll.shape[0]%48):], axis=0)
    piano_roll = piano_roll.reshape(-1,48,128)
    return piano_roll

def save_flat_piano_roll(piano_roll, msd_id, postfix):
    filepath = os.path.join(RESULT_PATH, postfix, msd_id + '.'+ FILETYPE)
    if FILETYPE == 'npz':  # compressed scipy sparse matrix
        piano_roll = piano_roll.reshape(-1,128)
        sparse_train_data = scipy.sparse.csc_matrix(piano_roll)
        scipy.sparse.save_npz(filepath, sparse_train_data)

In [14]:
def seperate_tracks():
    with open(MIDI_DICT_PATH) as f:
            midi_dict = json.load(f)
    subset_id_list = midi_dict.keys()
    counter = 0
    for song_idx in range(len(subset_id_list)):
        
        msd_id = subset_id_list[song_idx]
        npz_path =  cleaner_path + '/' + msd_id + '.npz'
        prefix = ['Bass', 'Drums', 'Guitar', 'Strings', 'Piano']
        flag_act = [0,0,0,0,0]
        
        for p in prefix:
            instr_dir = os.path.join(RESULT_PATH, p)
            if not os.path.exists(instr_dir):
                os.makedirs(instr_dir)

        bar_remplate = get_largest_piano_roll(npz_path).astype(float)

        for idx in range(5):
                piano_roll = np.zeros_like(bar_remplate, dtype=float)
                if(get_bar_piano_roll(npz_path, prefix[idx]).astype(float).shape[0] != 0):
                    piano_roll += get_bar_piano_roll(npz_path, prefix[idx]).astype(float)
                save_flat_piano_roll(piano_roll, msd_id, prefix[idx])
            
        counter += 1
        print('%d/%d' %( counter, song_idx), 'OK!')

seperate_tracks()

1/0 OK!
2/1 OK!
3/2 OK!
4/3 OK!
5/4 OK!
6/5 OK!
7/6 OK!
8/7 OK!
9/8 OK!
10/9 OK!
11/10 OK!
12/11 OK!
13/12 OK!
14/13 OK!
15/14 OK!
16/15 OK!
17/16 OK!
18/17 OK!
19/18 OK!
20/19 OK!
21/20 OK!
22/21 OK!
23/22 OK!
24/23 OK!
25/24 OK!
26/25 OK!
27/26 OK!
28/27 OK!
29/28 OK!
30/29 OK!
31/30 OK!
32/31 OK!
33/32 OK!
34/33 OK!
35/34 OK!
36/35 OK!
37/36 OK!
38/37 OK!
39/38 OK!
40/39 OK!
41/40 OK!
42/41 OK!
43/42 OK!
44/43 OK!
45/44 OK!
46/45 OK!
47/46 OK!
48/47 OK!
49/48 OK!
50/49 OK!
51/50 OK!
52/51 OK!
53/52 OK!
54/53 OK!
55/54 OK!
56/55 OK!
57/56 OK!
58/57 OK!
59/58 OK!
60/59 OK!
61/60 OK!
62/61 OK!
63/62 OK!
64/63 OK!
65/64 OK!
66/65 OK!
67/66 OK!
68/67 OK!
69/68 OK!
70/69 OK!
71/70 OK!
72/71 OK!
73/72 OK!
74/73 OK!
75/74 OK!
76/75 OK!
77/76 OK!
78/77 OK!
79/78 OK!
80/79 OK!
81/80 OK!
82/81 OK!
83/82 OK!
84/83 OK!
85/84 OK!
86/85 OK!
87/86 OK!
88/87 OK!
89/88 OK!
90/89 OK!
91/90 OK!
92/91 OK!
93/92 OK!
94/93 OK!
95/94 OK!
96/95 OK!
97/96 OK!
98/97 OK!
99/98 OK!
100/99 OK!
101/100 OK!
102/10

## Generate bars 
Using the cleaned data to generate bars

In [15]:
ROOT_TRACKS = RESULT_PATH
PATH_PIANO_ROLL = join(ROOT_TRACKS, 'Piano_Roll')
prefix = ['Bass', 'Drums', 'Guitar', 'Strings', 'Piano']
PATH_INSTRU_ACT = join(ROOT_TRACKS, 'act_instr')
PATH_ALL_ACT = join(ROOT_TRACKS, 'act_all')
NUM_SONGS = 500

In [16]:
def csc_to_array(csc):
    return scipy.sparse.csc_matrix((csc['data'], csc['indices'], csc['indptr']), shape= csc['shape']).toarray()
def reshape_to_bar(flat_array):
    return flat_array.reshape(-1,48,128)
def is_empty_bar(bar):
    return not np.sum(bar)

def generate_bars():
    
    if not os.path.exists(PATH_INSTRU_ACT):
        os.makedirs(PATH_INSTRU_ACT)
    if not os.path.exists(PATH_ALL_ACT):
        os.makedirs(PATH_ALL_ACT)
    if not os.path.exists(PATH_PIANO_ROLL):
        os.makedirs(PATH_PIANO_ROLL)
    
    song_list = onlyfiles = [f.split('.')[0] for f in listdir(join(ROOT_TRACKS, 'Drums')) if isfile(join(join(ROOT_TRACKS, 'Drums'), f))]

    thres = 3
    numOfSong = len(song_list)
    for song_idx in range(NUM_SONGS):
        
        msd_id = song_list[song_idx]
        sys.stdout.write('{0}/{1}\r'.format(song_idx, numOfSong))
        sys.stdout.flush()
        
        song_piano_rolls = []
        list_is_empty = []

        piano_roll = reshape_to_bar(csc_to_array(np.load(join(ROOT_TRACKS,prefix[0], msd_id+'.npz'))))
        song_piano_rolls.append(piano_roll)

        for idx in range(1,5):
            piano_roll_tmp = reshape_to_bar(csc_to_array(np.load(join(ROOT_TRACKS,prefix[idx], msd_id+'.npz'))))
            piano_roll += piano_roll_tmp
            song_piano_rolls.append(piano_roll_tmp)

        piano_roll = np.concatenate((piano_roll[:]))
        piano_roll = piano_roll.T

        numOfBar = song_piano_rolls[0].shape[0]
        instr_act = np.zeros((numOfBar,5))
        all_act = np.zeros(numOfBar)
        chroma = np.zeros_like(song_piano_rolls[0])

        for bar_idx in range(numOfBar):
            for pre_idx in range(5):
                bar = song_piano_rolls[pre_idx][bar_idx,:,:]
                instr_act[bar_idx, pre_idx] = not is_empty_bar(bar)
                all_act[bar_idx] = np.sum(instr_act[bar_idx, :]) >= thres

        sio.savemat(os.path.join(PATH_PIANO_ROLL, msd_id+'.mat'), {'piano_roll':piano_roll})
        np.save(join(PATH_INSTRU_ACT, msd_id+'.npy'), instr_act)
        np.save(join(PATH_ALL_ACT, msd_id+'.npy'), all_act)

In [17]:
generate_bars()

499/666