In [57]:
import os
import numpy as np
import scipy
from utils import midi2matrix
import pretty_midi as pyd
from tqdm import tqdm
import scipy.stats as st
ACC = 4

def piano_mixture_similarity(piano, mixture):
    num_bar = min(len(piano), len(mixture))
    piano = piano[:num_bar]
    mixture = mixture[:num_bar]
    sim = np.array([np.dot(piano[i], mixture[i]) for i in range(piano.shape[0])]) / (np.linalg.norm(piano, axis=-1)  * np.linalg.norm(mixture, axis=-1) + 1e-5)
    return np.mean(sim, axis=0)    #scalar


def degree_of_orchestration(piano, multi_track):
    #multi_track: num_bar x 12
    #multi_track: n_track x num_bar x 12
    num_bar = min(len(piano), multi_track.shape[1])
    piano = piano[:num_bar]
    multi_track = multi_track[:, :num_bar]
    histogram = np.array([(np.dot(multi_track[:, idx], piano[idx]) + 1e-10) for idx in range(len(piano))]) / (np.linalg.norm(piano, axis=-1)[:, np.newaxis] * np.linalg.norm(multi_track.transpose(1, 0, 2), axis=-1) + 1e-5)   #(num_bar, n_track)
    #print(np.sum(histogram, axis=-1))
    return np.mean([scipy.stats.entropy(bar) for bar in histogram])

def load_piano_acc(path):
    piano = pyd.PrettyMIDI(path)
    beats = piano.get_beats()
    beats = np.append(beats, beats[-1] + (beats[-1] - beats[-2]))
    quantize = scipy.interpolate.interp1d(np.array(range(0, len(beats))) * ACC, beats, kind='linear')
    quaver = quantize(np.array(range(0, (len(beats) - 1) * ACC)))
    piano, prog = midi2matrix(piano, quaver)
    piano = piano[:, :piano.shape[1]//16*16]

    pitch_hist = np.sum(piano[1:, :, :120].reshape(len(prog)-1, -1, 10, 12), axis=(0, -2))
    pitch_hist = np.sum(pitch_hist.reshape(-1, 16, 12), axis=-2)
    grooves = np.sum(piano[1:, :, :] > 0, axis=(0, -1)).reshape(-1, 16)
    return pitch_hist, grooves

def load_multi_track_acc(path, melody_id):
    orchestration = pyd.PrettyMIDI(path)
    beats = orchestration.get_beats()
    beats = np.append(beats, beats[-1] + (beats[-1] - beats[-2]))
    quantize = scipy.interpolate.interp1d(np.array(range(0, len(beats))) * ACC, beats, kind='linear')
    quaver = quantize(np.array(range(0, (len(beats) - 1) * ACC)))
    orchestration, prog = midi2matrix(orchestration, quaver)
    orchestration = orchestration[:, :orchestration.shape[1]//16*16]
    orchestration = np.delete(orchestration, melody_id, axis=0)
    
    pitch_hist = np.sum(orchestration[:, :, :120].reshape(len(prog)-1, -1, 10, 12), axis=-2)
    pitch_hist = np.sum(pitch_hist.reshape(len(prog)-1, -1, 16, 12), axis=-2)
    grooves = np.sum(orchestration > 0, axis=-1).reshape(len(prog)-1, -1, 16)
    return pitch_hist, grooves

results = {'Q&A-XL_blur_0': {'p_sim': [], 'p_entro': [], 'g_sim': [], 'g_entro': []}, \
           'Q&A-XL_blur_0.5': {'p_sim': [], 'p_entro': [], 'g_sim': [], 'g_entro': []}, \
           'Q&A-XL_blur_1': {'p_sim': [], 'p_entro': [], 'g_sim': [], 'g_entro': []}, \
           'Q&A': {'p_sim': [], 'p_entro': [], 'g_sim': [], 'g_entro': []}, \
           'Arranger-2': {'p_sim': [], 'p_entro': [], 'g_sim': [], 'g_entro': []}\
           }
melody_id_dict = {'Q&A-XL_blur_0':-1, 'Q&A-XL_blur_0.5':-1, 'Q&A-XL_blur_1':-1, 'Q&A':-1, 'Arranger-2':0}
count = 0
for demo in [1, 2, 3]:
    demo_root = f'../orchestration_with_ablation/demo_{demo}'
    for song in tqdm(os.listdir(demo_root)):
        histo_pno, grooves_pno = load_piano_acc(os.path.join(demo_root, song, 'piano_recon.mid'))
        for model in ['Q&A-XL_blur_0', 'Q&A-XL_blur_0.5', 'Q&A-XL_blur_1', 'Q&A', 'Arranger-2']:
        
            histo_orch, grooves_orch = load_multi_track_acc(os.path.join(demo_root, song, f'{model}.mid'), melody_id=melody_id_dict[model])

            p_sim = piano_mixture_similarity(histo_pno, np.sum(histo_orch, axis=0))
            p_entro = degree_of_orchestration(histo_pno, histo_orch)
            g_sim = piano_mixture_similarity(grooves_pno, np.sum(grooves_orch, axis=0))
            g_entro = degree_of_orchestration(grooves_pno, grooves_orch)
            results[model]['p_sim'].append(p_sim)
            results[model]['p_entro'].append(p_entro)
            results[model]['g_sim'].append(g_sim)
            results[model]['g_entro'].append(g_entro)

for key in results:
    print(f"{key}\t p-Sim={np.mean(results[key]['p_sim']):.5f} + {st.sem(results[key]['p_sim']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_sim'])-1):.5f}\t\
                    p-Etr={np.mean(results[key]['p_entro']):.5f} + {st.sem(results[key]['p_entro']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_entro'])-1):.5f}\t\
                    g-Sim={np.mean(results[key]['g_sim']):.5f} + {st.sem(results[key]['g_sim']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_sim'])-1):.5f}\t\
                    g-Etr={np.mean(results[key]['g_entro']):.5f} + {st.sem(results[key]['g_entro']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_entro'])-1):.5f}")

100%|██████████| 55/55 [00:18<00:00,  3.04it/s]
100%|██████████| 55/55 [00:17<00:00,  3.12it/s]
100%|██████████| 55/55 [00:17<00:00,  3.17it/s]

Q&A-XL_blur_0	 p-Sim=0.92118 + 0.00809	                    p-Etr=1.63749 + 0.04753	                    g-Sim=0.86462 + 0.00964	                    g-Etr=1.62582 + 0.04782
Q&A-XL_blur_0.5	 p-Sim=0.91967 + 0.00775	                    p-Etr=1.90828 + 0.03603	                    g-Sim=0.80587 + 0.01162	                    g-Etr=1.88433 + 0.03582
Q&A-XL_blur_1	 p-Sim=0.91661 + 0.00792	                    p-Etr=2.02799 + 0.03684	                    g-Sim=0.78441 + 0.01286	                    g-Etr=2.00000 + 0.03614
Q&A	 p-Sim=0.88574 + 0.00988	                    p-Etr=1.63581 + 0.03543	                    g-Sim=0.73230 + 0.01491	                    g-Etr=1.60909 + 0.03615
Arranger-2	 p-Sim=0.98862 + 0.00635	                    p-Etr=0.34147 + 0.04460	                    g-Sim=0.98900 + 0.00637	                    g-Etr=0.33764 + 0.04355





In [60]:
for key in results:
    print(f"{key}\t p-Sim={np.mean(results[key]['p_sim']):.5f} + {st.sem(results[key]['p_sim']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_sim'])-1):.5f}\t\
                        p-Etr={np.mean(results[key]['p_entro']):.5f} + {st.sem(results[key]['p_entro']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['p_entro'])-1):.5f}\t\
                        g-Sim={np.mean(results[key]['g_sim']):.5f} + {st.sem(results[key]['g_sim']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_sim'])-1):.5f}\t\
                        g-Etr={np.mean(results[key]['g_entro']):.5f} + {st.sem(results[key]['g_entro']) * scipy.stats.t.ppf((1 + 0.95) / 2., len(results[key]['g_entro'])-1):.5f}")

Q&A-XL_blur_0	 p-Sim=0.92118 + 0.00809	                        p-Etr=1.63749 + 0.04753	                        g-Sim=0.86462 + 0.00964	                        g-Etr=1.62582 + 0.04782
Q&A-XL_blur_0.5	 p-Sim=0.91967 + 0.00775	                        p-Etr=1.90828 + 0.03603	                        g-Sim=0.80587 + 0.01162	                        g-Etr=1.88433 + 0.03582
Q&A-XL_blur_1	 p-Sim=0.91661 + 0.00792	                        p-Etr=2.02799 + 0.03684	                        g-Sim=0.78441 + 0.01286	                        g-Etr=2.00000 + 0.03614
Q&A	 p-Sim=0.88574 + 0.00988	                        p-Etr=1.63581 + 0.03543	                        g-Sim=0.73230 + 0.01491	                        g-Etr=1.60909 + 0.03615
Arranger-2	 p-Sim=0.98862 + 0.00635	                        p-Etr=0.34147 + 0.04460	                        g-Sim=0.98900 + 0.00637	                        g-Etr=0.33764 + 0.04355
