In [17]:
import os
import numpy as np
import scipy
from utils import midi2matrix
import pretty_midi as pyd
from tqdm import tqdm
import sys
sys.path.append('../exported_midi_chord_recognition')
from main import transcribe_cb1000_midi
import scipy.stats as st
ACC = 4

def load_mixture_acc(path, melody_id=None):
    multi_track = pyd.PrettyMIDI(path)
    beats = multi_track.get_beats()
    beats = np.append(beats, beats[-1] + (beats[-1] - beats[-2]))
    quantize = scipy.interpolate.interp1d(np.array(range(0, len(beats))) * ACC, beats, kind='linear')
    quaver = quantize(np.array(range(0, (len(beats) - 1) * ACC)))
    multi_track, _ = midi2matrix(multi_track, quaver)
    multi_track = multi_track[:, :multi_track.shape[1]//16*16]
    if melody_id is not None:
        multi_track = np.delete(multi_track, melody_id, axis=0)
    mixture = np.max(multi_track, axis=0)

    pitch_hist = np.sum(mixture[:, :120].reshape(-1, 10, 12), axis=-2)
    pitch_hist = np.sum(pitch_hist.reshape(-1, 16, 12), axis=-2)
    grooves = np.sum(mixture[:, :] > 0, axis=-1).reshape(-1, 16)
    grooves[grooves>0] = 1
    return pitch_hist, grooves

def load_multi_track_acc(path, melody_id=None):
    orchestration = pyd.PrettyMIDI(path)
    beats = orchestration.get_beats()
    beats = np.append(beats, beats[-1] + (beats[-1] - beats[-2]))
    quantize = scipy.interpolate.interp1d(np.array(range(0, len(beats))) * ACC, beats, kind='linear')
    quaver = quantize(np.array(range(0, (len(beats) - 1) * ACC)))
    orchestration, _ = midi2matrix(orchestration, quaver)
    orchestration = orchestration[:, :orchestration.shape[1]//16*16]
    if melody_id is not None:
        orchestration = np.delete(orchestration, melody_id, axis=0)
    
    pitch_hist = np.sum(orchestration[:, :, :120].reshape(len(orchestration), -1, 10, 12), axis=-2)
    pitch_hist = np.sum(pitch_hist.reshape(len(orchestration), -1, 16, 12), axis=-2)
    grooves = np.sum(orchestration > 0, axis=-1).reshape(len(orchestration), -1, 16)
    grooves[grooves>0] = 1
    return pitch_hist, grooves


def pitch_historgam_entropy(histo_mix):
    #histo_mix: num_bar x 12
    empty = np.nonzero(np.sum(histo_mix, axis=-1) == 0)[0]
    if len(empty) > 0:
        histo_mix = np.delete(histo_mix, empty, axis=0)
    return np.mean([scipy.stats.entropy(bar) for bar in histo_mix])

def groove_consistency(grooves_mix):
    #grooves_mix: num_bar x 16
    empty = np.nonzero(np.sum(grooves_mix, axis=-1) == 0)[0]
    if len(empty) > 0:
        grooves_mix = np.delete(grooves_mix, empty, axis=0)
    results = []
    for i in range(len(grooves_mix)):
        for j in range(len(grooves_mix)):
            results.append(1 - np.sum((grooves_mix[i] * grooves_mix[j]) == 0) / 16)
    return np.mean(results)


def structure_dynamics(grooves_mix, phrase_seg=[8, 8, 8, 8]):
    #grooves_mix: num_bar x 16
    #print(len(grooves_mix), np.sum(phrase_seg))
    if len(grooves_mix) > np.sum(phrase_seg):
        grooves_mix = grooves_mix[:np.sum(phrase_seg)]
    assert(len(grooves_mix) == np.sum(phrase_seg))
    results = []
    start = 0
    for p_len in phrase_seg:
        in_phrase_result = []
        for i in range(start, start+p_len):
            for j in range(start, start+p_len):
                in_phrase_result.append(1 - np.sum((grooves_mix[i] * grooves_mix[j]) == 0) / 16)
        out_phrase_result = []
        for i in range(start, start+p_len):
            for j in range(len(grooves_mix)):
                if (j < start) or (j >= start+p_len):
                    out_phrase_result.append(1 - np.sum((grooves_mix[i] * grooves_mix[j]) == 0) / 16)
        start += p_len
        results.append(np.mean(in_phrase_result) / np.mean(out_phrase_result))
    return np.mean(results)


def track_wise_entropy(multi_track):
    #multi_track: n_track x num_bar x 12
    return np.mean([pitch_historgam_entropy(track) for track in multi_track])
def track_wise_consistency(multi_track):
    #multi_track: n_track x num_bar x 12
    return np.mean([groove_consistency(track) for track in multi_track])

def chord_comparator(path, name):
    chord_1 = []
    chord = transcribe_cb1000_midi(os.path.join(path, 'lead_sheet.mid'), output_path=None)
    INCRE = 60 / pyd.PrettyMIDI(os.path.join(path, 'lead_sheet.mid')).get_tempo_changes()[1][0]
    for item in chord:
        chord_1 += [item[-1].split('/')[0]]*int(round((item[1]-item[0]) / INCRE))
    chord_2 = []
    chord = transcribe_cb1000_midi(os.path.join(path, f'{name}.mid'), output_path=None)
    INCRE = 60 / pyd.PrettyMIDI(os.path.join(path, f'{name}.mid')).get_tempo_changes()[1][0]
    for item in chord:
        chord_2 += [item[-1].split('/')[0]]*int(round((item[1]-item[0]) / INCRE))
    if not (len(chord_1) == len(chord_2)):
        #print('chord', len(chord_1), len(chord_2))
        #print(chord_1)
        #print(chord_2)
        chord_len = min(len(chord_1), len(chord_2))
        chord_1 = chord_1[:chord_len]
        chord_2 = chord_2[:chord_len]
    #assert(len(chord_1) == len(chord_2))
    result = 0
    for i in range(len(chord_1)):
        if chord_1[i] == chord_2[i]:
            result += 1
        #else:
        #    print(chord_1[i], chord_2[i])
    result = result / len(chord_1)
    return result

with open("../nottingham_database/phrase_cleaned.txt", 'r') as f:
    phrases = f.readlines()
phrase_dict = {}
for item in phrases:
    p_len =[]
    for i in item.split('\t')[1][1::2]:
        p_len.append(int(i))
    phrase_dict[item.split('\t')[0]] = p_len
#print(phrase_dict)

results = {'AccoMontage3': {'p_etr': [], 'g_cst': [], 'dyn': [], 'trk_p_etr': [], 'trk_g_etr': [], 'chd_acc': []}, \
           'Jianianhua': {'p_etr': [], 'g_cst': [], 'dyn': [], 'trk_p_etr': [], 'trk_g_etr': [], 'chd_acc': []}, \
           'PopMAG': {'p_etr': [], 'g_cst': [], 'dyn': [], 'trk_p_etr': [], 'trk_g_etr': [], 'chd_acc': []}\
           }
melody_id_dict = {'AccoMontage3':-1, 'Jianianhua':0, 'PopMAG':0}
for demo in [1, 2, 3]:
    demo_root = f"arrangement/demo_{demo}"
    for song in tqdm(os.listdir(demo_root)):
        phrase = phrase_dict[song]
        for model in ['AccoMontage3', 'Jianianhua', 'PopMAG']:
            histo_mix, grooves_mix = load_mixture_acc(os.path.join(demo_root, song, f'{model}.mid'), melody_id=melody_id_dict[model])
            histo_track, grooves_track = load_multi_track_acc(os.path.join(demo_root, song, f'{model}.mid'), melody_id=melody_id_dict[model])
            results[model]['p_etr'].append(pitch_historgam_entropy(histo_mix))
            results[model]['g_cst'].append(groove_consistency(grooves_mix))
            results[model]['dyn'].append(structure_dynamics(grooves_mix, phrase_seg=phrase))
            results[model]['trk_p_etr'].append(track_wise_entropy(histo_track))
            results[model]['trk_g_etr'].append(track_wise_consistency(grooves_track))
            results[model]['chd_acc'].append(chord_comparator(os.path.join(demo_root, song), model))

100%|██████████| 66/66 [01:47<00:00,  1.63s/it]
100%|██████████| 66/66 [01:48<00:00,  1.64s/it]
100%|██████████| 66/66 [01:50<00:00,  1.67s/it]


In [18]:
for key in results:
    print(f"{key}\t p-Etr={np.mean(results[key]['p_etr']):.5f} + {st.sem(results[key]['p_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_etr'])-1):.5f}\t\
                    g-Cst={np.mean(results[key]['g_cst']):.5f} + {st.sem(results[key]['g_cst']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_cst'])-1):.5f}\t\
                    s-dyn={np.mean(results[key]['dyn']):.5f} + {st.sem(results[key]['dyn']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['dyn'])-1):.5f}\t\
                    trk-p={np.mean(results[key]['trk_p_etr']):.5f} + {st.sem(results[key]['trk_p_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['trk_p_etr'])-1):.5f}\t\
                    trk-g={np.mean(results[key]['trk_g_etr']):.5f} + {st.sem(results[key]['trk_g_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['trk_g_etr'])-1):.5f}\t\
                    chd={np.mean(results[key]['chd_acc']):.5f} + {st.sem(results[key]['chd_acc']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['chd_acc'])-1):.5f}")

AccoMontage3	 p-Etr=1.29535 + 0.01522	                    g-Cst=0.52693 + 0.01979	                    s-dyn=1.08076 + 0.00778	                    trk-p=0.76001 + 0.01542	                    trk-g=0.14174 + 0.00485	                    chd=0.72168 + 0.02024
Jianianhua	 p-Etr=1.32391 + 0.02046	                    g-Cst=0.55623 + 0.03252	                    s-dyn=1.05569 + 0.01150	                    trk-p=0.91413 + 0.02130	                    trk-g=0.26791 + 0.01198	                    chd=0.70607 + 0.02170
PopMAG	 p-Etr=1.32200 + 0.01927	                    g-Cst=0.46709 + 0.02251	                    s-dyn=1.05669 + 0.00581	                    trk-p=0.76200 + 0.02252	                    trk-g=0.10504 + 0.00455	                    chd=0.60882 + 0.02045


In [None]:
demo_root = "../lmd"
results = {'Real': {'p_etr': [], 'g_cst': [], 'trk_p_etr': [], 'trk_g_etr': []}}
melody_id_dict = {'AccoMontage3':-1, 'Jianianhua':0, 'PopMAG':0}
for song in tqdm(os.listdir(demo_root)):
    try:
        histo_mix, grooves_mix = load_mixture_acc(os.path.join(demo_root, song))
        histo_track, grooves_track = load_multi_track_acc(os.path.join(demo_root, song))
        results['Real']['p_etr'].append(pitch_historgam_entropy(histo_mix))
        results['Real']['g_cst'].append(groove_consistency(grooves_mix))
        results['Real']['trk_p_etr'].append(track_wise_entropy(histo_track))
        results['Real']['trk_g_etr'].append(track_wise_consistency(grooves_track))
    except:
        continue


In [20]:
for key in results:
    print(f"{key}\t p-Etr={np.mean(results[key]['p_etr']):.5f} + {st.sem(results[key]['p_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_etr'])-1):.5f}\t\
                    g-Cst={np.mean(results[key]['g_cst']):.5f} + {st.sem(results[key]['g_cst']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_cst'])-1):.5f}\t\
                    trk-p={np.mean(results[key]['trk_p_etr']):.5f} + {st.sem(results[key]['trk_p_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['trk_p_etr'])-1):.5f}\t\
                    trk-g={np.mean(results[key]['trk_g_etr']):.5f} + {st.sem(results[key]['trk_g_etr']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['trk_g_etr'])-1):.5f}\t")

Real	 p-Etr=1.55543 + 0.01577	                    g-Cst=0.52725 + 0.01340	                    trk-p=nan + nan	                    trk-g=nan + nan	


In [33]:
new_list = []
for item in results[key]['trk_p_etr']:
    if np.isnan(item):
        continue
    new_list.append(item)
print(f"{np.mean(new_list):.5f} + {st.sem(new_list) * scipy.stats.t.ppf((1 + 0.95) / 2., len(new_list)-1):.5f}")

0.90548 + 0.00016


In [34]:
new_list = []
for item in results[key]['trk_g_etr']:
    if np.isnan(item):
        continue
    new_list.append(item)
print(f"{np.mean(new_list):.5f} + {st.sem(new_list) * scipy.stats.t.ppf((1 + 0.95) / 2., len(new_list)-1):.5f}")

0.23359 + 0.00747
