# ICM Final Project

## Music Style Recombination and Interpolation in General

In [1]:
from __future__ import print_function # use the print() function from Python3
import matplotlib.pyplot as plt
import librosa             # The librosa library
import librosa.display
from pylab import *
import IPython.display     # IPython's display module (for in-line audio)
import madmom              # madmom MIR library
import spleeter            # spleeter source separation
from spleeter.separator import Separator
import numpy as np              # numpy  numerical functions
import ffmpeg
SR = 44100                 # default sample rate
HOP_LENGTH = 1024           # default hop length
import torch
import soundfile as sf
import pretty_midi as pm
import os
from operator import add, itemgetter
import math
import random

if not os.path.exists('./demo'):
    os.mkdir('./demo')

### Part 1: Use MIR to extract fundamental frequency, chords, beat and pitch onset from input mp3 files, generate corresponding melody midi and chord midi

In [2]:
def spleeter_separate(file_name, mode="spleeter:2stems", sr=44100):
    mix, _ = librosa.load(file_name, sr=sr)
    separator = Separator(mode)
    predicted_sources = separator.separate(mix[:, None])
    if not os.path.exists(f'{file_name[:-4]}_process_audio'):
        os.mkdir(f'{file_name[:-4]}_processed_audio')
    for key in predicted_sources:
        separated_source = predicted_sources[key][:, 0] # remove the channel dimension
        sf.write(f'{file_name[:-4]}_processed_audio/{file_name[:-4]}_{key}.wav', separated_source, sr)
    print('Vocal successfully extracted')

In [3]:
def pitch_extraction_advanced(file_name, time_threshold=0.2, pitch_threshold=0.5, sample_rate=44100, frame_length=2048):
    # Implement pYIN to obtain f0 in Hz
    wave, _ = librosa.load(file_name, sample_rate)
    hop_length = frame_length//4
    f0, voiced_flag, voiced_probs = librosa.pyin(wave, 
                                                 sr=sample_rate, 
                                                 frame_length=frame_length,
                                                 hop_length= hop_length, 
                                                 fmin=librosa.note_to_hz('C2'), 
                                                 fmax=librosa.note_to_hz('C7'))
    times = librosa.times_like(f0, sr=sample_rate, hop_length= hop_length)
    
    f = open(f'{file_name[:-4]}_f0.txt', 'w')
    for time, freq in zip(times, f0):
        if (not np.isnan(freq)):
            f.write('%f\t%f\n' % (time, freq))
    f.close()
    
    # Separate f0 and times by NaN (array of array)
    f0_nested = np.array([f0[s] for s in np.ma.clump_unmasked(np.ma.masked_invalid(f0))])
    times_nested = np.array([times[s] for s in np.ma.clump_unmasked(np.ma.masked_invalid(f0))])
    
    # Delete the lists that have too little instances (list of array)
    for i in range(len(f0_nested)):
        if times_nested[i][-1] - times_nested[i][0] < time_threshold:
            f0_nested_filtered = np.delete(f0_nested, i, axis=0)
            times_nested_filtered = np.delete(times_nested, i, axis=0)
    try:
        f0_nested_filtered
        times_nested_filtered
    except:
        f0_nested_filtered = f0_nested
        times_nested_filtered = times_nested
        
    f0_nested_filtered = list(f0_nested_filtered)
    times_nested_filtered = list(times_nested_filtered)

    
    # Wash the lists based on thresholds
    pitch_index_nested = []
    time_nested_new = []
    for list_idx in range(len(f0_nested_filtered)):
        pitch_index_list = []
        time_list_new = []
        for i in range(len(f0_nested_filtered[list_idx])):
            if i == 0:
                pitch_index_list.append(i)
                time_list_new.append(times_nested_filtered[list_idx][i])
                
            else:
                current = librosa.hz_to_midi(f0_nested_filtered[list_idx][i])
                previous = librosa.hz_to_midi(f0_nested_filtered[list_idx][i-1])
                if abs(current-previous) > pitch_threshold and times_nested_filtered[list_idx][i] - time_list_new[-1] > time_threshold:
                    pitch_index_list.append(i)
                    time_list_new.append(times_nested_filtered[list_idx][i])
                    
        time_list_new.append(times_nested_filtered[list_idx][-1])
        pitch_index_list = np.array(pitch_index_list)
        pitch_index_nested.append(pitch_index_list)
        
        time_list_new = np.array(time_list_new)
        time_nested_new.append(time_list_new)
            
    # Convert the index to pitch (by taking average)
    pitch_list = []
    for list_idx in range(len(pitch_index_nested)):
        pitch = []
        for i in range(len(pitch_index_nested[list_idx])):
            try:
                freq_group = f0_nested_filtered[list_idx][pitch_index_nested[list_idx][i]:pitch_index_nested[list_idx][i+1]]
            except:
                freq_group = f0_nested_filtered[list_idx][pitch_index_nested[list_idx][i]:-1]
            
            if list(freq_group) == []:
                freq_group = [f0_nested_filtered[list_idx][pitch_index_nested[list_idx][i]]]
            freq_avg = np.mean(freq_group)
            pitch_avg = round(librosa.hz_to_midi(freq_avg))
            pitch.append(pitch_avg)
        pitch = np.array(pitch)
        pitch_list.append(pitch)

    # Generate
    music = pm.PrettyMIDI(initial_tempo=75)
    piano = pm.Instrument(program=1)
    for pitch_array, time_array in zip(pitch_list, time_nested_new):
        for i in range(len(pitch_array)):
            pitch = pitch_array[i]
            start = time_array[i]
            end = time_array[i+1]
            if start == end:
                end = start + max(time_threshold/5, 0.2)
                
            note = pm.Note(velocity=100, pitch=pitch, start=start, end=end)
            piano.notes.append(note)
    music.instruments.append(piano)
    music.write(f'{file_name[:-4]}_extracted_pitch.mid')
    print('Pitch extraction successful')

In [4]:
from madmom.features import DBNDownBeatTrackingProcessor, RNNDownBeatProcessor, CNNChordFeatureProcessor, CRFChordRecognitionProcessor, PeakPickingProcessor, NotePeakPickingProcessor, RNNPianoNoteProcessor
from madmom.processors import ParallelProcessor, Processor, SequentialProcessor

In [5]:
#Extract chords from the audio
in_processor = CNNChordFeatureProcessor()
chord_processor = CRFChordRecognitionProcessor()

def get_chord(fn):
    data = in_processor.process(fn)
    chords = chord_processor.process(data)
    f = open(f'{fn[:-4]}_processed_audio/{fn[:-4]}_chords.txt', 'w')
    for line in chords:
        f.write('%f\t%f\t%s\n' % (line[0], line[1], line[2]))
    f.close()
    return chords

In [6]:
def melody_and_chords_extraction(fn):
    spleeter_separate(fn)
    pitch_extraction_advanced(f'{fn[:-4]}_processed_audio/{fn[:-4]}_vocals.wav', time_threshold=0.2, pitch_threshold=0.5, sample_rate=44100, frame_length=2048)
    chords = get_chord(fn)
    melody_name = f'{fn[:-4]}_processed_audio/{fn[:-4]}_vocals_extracted_pitch.mid'
    
    return melody_name, chords

### Part 2: Quantize generated melody midi and chord midi and transform them into list of two-measure notes

In [7]:
from ec2vae.model import EC2VAE
# initialize the model
ec2vae_model = EC2VAE.init_model()

# load model parameter
ec2vae_param_path = './ec2vae/model_param/ec2vae-v1.pt'
ec2vae_model.load_model(ec2vae_param_path)

In [8]:
# input the time in seconds, output the subbeat index of range 0 .. len(keypoints) - 1
def quantize(keypoint_boundaries, time):
    return np.searchsorted(keypoint_boundaries, time)

def get_quantized_notes_and_chords(name_of_melody_file, chords):
    #get beat
    print(len(chords))
    midi_data = pm.PrettyMIDI(name_of_melody_file)
    display(midi_data.instruments)
    beats = midi_data.get_beats()
    print(beats[:10])
    beat_id = np.arange(len(beats))
    subbeat_id = np.arange(len(beats) * 4) / 4.0
    keypoints = np.interp(subbeat_id, beat_id, beats)
    keypoint_boundaries = (keypoints[1:] + keypoints[:-1]) / 2
    #melody quantization
    quantized_notes = [{
    'start_quantized': quantize(keypoint_boundaries, note.start),
    'end_quantized': quantize(keypoint_boundaries, note.end),
    'pitch': note.pitch,
    'velocity' : note.velocity,
    } for note in midi_data.instruments[0].notes]
    
    #chord quantization
    quantized_chords = [{
    'start_quantized': quantize(keypoint_boundaries, float(chord[0])),
    'end_quantized': quantize(keypoint_boundaries, float(chord[1])),
    'chord': chord[2],
    'velocity' : 100,
    } for chord in chords]

    return quantized_notes, quantized_chords

In [9]:
# Sample chords.
chord_dic = {}
chord_dic['A:maj'] = [0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
chord_dic['A:min'] = [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]
chord_dic['A#:maj'] = [0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]
chord_dic['A#:min'] = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0]
chord_dic['B:maj'] = [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
chord_dic['B:min'] = [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1]
chord_dic['C:maj'] = [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
chord_dic['C:min'] = [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]
chord_dic['C#:maj'] = [0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]
chord_dic['C#:min'] = [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]
chord_dic['D:maj'] = [0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0]
chord_dic['D:min'] = [0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]
chord_dic['D#:maj'] = [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0]
chord_dic['D#:min'] = [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]
chord_dic['E:maj'] = [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1]
chord_dic['E:min'] = [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1]
chord_dic['F:maj'] = [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
chord_dic['F:min'] = [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]
chord_dic['F#:maj'] = [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]
chord_dic['F#:min'] = [0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0]
chord_dic['G:maj'] = [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1]
chord_dic['G:min'] = [0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0]
chord_dic['G#:maj'] = [1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]
chord_dic['G#:min'] = [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1]
chord_dic['N'] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [10]:
def get_two_measures_list(quantized_notes, quantized_chords):
    #Generate List of two measures notes
    print(len(quantized_notes), len(quantized_chords))
    score = []
    last_end = 0
    for note in quantized_notes:
        if note['start_quantized'] > last_end:
            for i in range(note['start_quantized']-last_end):
                score.append(129)
        duration = note['end_quantized'] - note['start_quantized']
        for i in range(duration):
            if i == 0:
                score.append(note['pitch'])
            else:
                score.append(128)
            
        last_end = note['end_quantized']
    
    l_two_measures = []
    n = 0
    while 32*(n+1) < len(score):
        l_two_measures.append(np.array(score[32*n:32*(n+1)]))
        n += 1  
    last_two_measure = score[32*n:]
    while len(last_two_measure) < 32:
        last_two_measure.append(129)
    
    l_two_measures.append(np.array(last_two_measure))
    
    #Generate List of two measures chords
    chord_score = []
    last_end = quantized_chords[0]['start_quantized']
    for chord in quantized_chords:
        if chord['start_quantized'] > last_end:
            for i in range(chord['start_quantized']-last_end):
                chord_score.append(chord_dic['N'])
        duration = chord['end_quantized'] - chord['start_quantized']
        for i in range(duration):
            chord_score.append(chord_dic[chord['chord']])
            
        last_end = chord['end_quantized']
    
    l_two_measure_chords = []
    n = 0
    while 32*(n+1) < len(chord_score):
        l_two_measure_chords.append(np.array(chord_score[32*n:32*(n+1)]))
        n += 1
    
    last_two_measure_chords = chord_score[32*n:]
    while len(last_two_measure_chords) < 32:
        last_two_measure_chords.append(chord_dic['N'])
    
    l_two_measure_chords.append(np.array(last_two_measure_chords))
    
    while len(l_two_measure_chords) < len(l_two_measures):
        l_two_measure_chords.append(np.array([chord_dic['N']]*32))
        
    
    return l_two_measures, l_two_measure_chords

In [11]:
#Turn note arrays into one-hot vectors
def note_array_to_onehot(note_array):
    pr = np.zeros((len(note_array), 130))
    pr[np.arange(0, len(note_array)), note_array.astype(int)] = 1.
    return pr

### Part 3: Style Recombination and Interpolation, Generate Raw Piece

In [12]:
def get_z_list(l_two_measures, l_two_measure_chords):
    pr_list = [note_array_to_onehot(x) for x in l_two_measures]
    #Convert to pytorch tensors, and to cuda/cpu
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # Concert
    pr_list = [torch.from_numpy(pr).float().to(device).unsqueeze(0) for pr in pr_list]
    #chord list
    l_c = [torch.from_numpy(c).float().to(device).unsqueeze(0) for c in l_two_measure_chords]
    # encode list of two-measure melodies and chords
    z_list = []
    for i in range(len(pr_list)):
        zp, zr = ec2vae_model.encoder(pr_list[i], l_c[i])
        z_list.append([zp, zr])
        
    return z_list, l_c

In [13]:
def generate_midi_with_melody_chord(fn, mel_notes, c_notes, tempo):
    midi = pm.PrettyMIDI(initial_tempo = tempo)
    ins1 = pm.Instrument(0)
    ins1.notes = mel_notes
    ins2 = pm.Instrument(0)
    ins2.notes = c_notes
    midi.instruments.append(ins1)
    midi.instruments.append(ins2)
    midi.write(fn)

In [14]:
def generate_raw_piece(z_list_pitch, z_list_rhythm, l_c, beats, fn):
    # Use pitch of the piece and rhythm of another piece
    l_new_rhythm = []
    for i in range(len(z_list_pitch)):
        if i > len(z_list_rhythm) - 1:
            pred_piece = ec2vae_model.decoder(z_list_pitch[i][0], z_list_rhythm[i%len(z_list_rhythm)][1], l_c[i])
        else:
            pred_piece = ec2vae_model.decoder(z_list_pitch[i][0], z_list_rhythm[i][1], l_c[i])
        l_new_rhythm.append(pred_piece)
    
    out_list = []
    for i in l_new_rhythm:
        out_new = i.squeeze(0).cpu().numpy()
        out_list.append(out_new)
        
    notes_list = []
    for i in range(len(out_list)):
        notes = ec2vae_model.__class__.note_array_to_notes(out_list[i], bpm=60/beats, start=8*beats*i)
        notes_list.extend(notes)
        
    all_notes_l_c = []   
    #notes_l_c = [ec2vae_model.__class__.chord_to_notes(c.squeeze(0).cpu().numpy(), 60/beats, 0) for c in l_c]
    notes_l_c = [ec2vae_model.__class__.chord_to_notes(l_c[a].squeeze(0).cpu().numpy(), 60/beats, 8*beats*a) for a in range(len(l_c))]
    for i in notes_l_c:
        all_notes_l_c.extend(i)
    
    #for i in range(len(out_list)):
        #for note in notes_l_c[i]:
            #all_notes_l_c.append(pm.Note(start=note.start + 8*beats*i, end=note.end + 8*beats*i, pitch=note.pitch, velocity=note.velocity))

    generate_midi_with_melody_chord(fn, notes_list, all_notes_l_c, tempo = 60/beats)
    

In [15]:
def from_piece_to_result(pitch_files, pitch_files_chords, rhythm_files, rhythm_files_chords, fn, weight=1):
    midi_data_list = [pm.PrettyMIDI(pitch_file) for pitch_file in pitch_files]
    beats = [midi_data.get_beats()[1] for midi_data in midi_data_list]
    quantized_notes_pitch_list = [get_quantized_notes_and_chords(pitch_files[i], pitch_files_chords[i])[0] for i in range(len(pitch_files))]
    quantized_chords_pitch_list = [get_quantized_notes_and_chords(pitch_files[i], pitch_files_chords[i])[1] for i in range(len(pitch_files))]

    quantized_notes_rhythm_list = [get_quantized_notes_and_chords(rhythm_files[i], rhythm_files_chords[i])[0] for i in range(len(rhythm_files))]
    quantized_chords_rhythm_list = [get_quantized_notes_and_chords(rhythm_files[i], rhythm_files_chords[i])[1] for i in range(len(rhythm_files))]
    
    l_two_measures_pitch_1ist = [get_two_measures_list(quantized_notes_pitch_list[i], quantized_chords_pitch_list[i])[0] for i in range(len(quantized_notes_pitch_list))]
    l_two_measures_chords_pitch_list = [get_two_measures_list(quantized_notes_pitch_list[i], quantized_chords_pitch_list[i])[1] for i in range(len(quantized_notes_pitch_list))]
    
    l_two_measures_rhythm_1ist = [get_two_measures_list(quantized_notes_rhythm_list[i], quantized_chords_rhythm_list[i])[0] for i in range(len(quantized_notes_rhythm_list))]
    l_two_measures_chords_rhythm_list = [get_two_measures_list(quantized_notes_rhythm_list[i], quantized_chords_rhythm_list[i])[1] for i in range(len(quantized_notes_rhythm_list))]
    
    
    
    z_big_pitch_list = [get_z_list(l_two_measures_pitch_1ist[i], l_two_measures_chords_pitch_list[i])[0] for i in range(len(l_two_measures_pitch_1ist))]
            
    k = weight
    if len(z_big_pitch_list) > 1:
        for i in range(len(z_big_pitch_list[0])):
            z_big_pitch_list[0][i][0] = z_big_pitch_list[0][i][0]*k
            z_big_pitch_list[0][i][1] = z_big_pitch_list[0][i][1]*k
    
            for j in range(1, len(z_big_pitch_list)):
                if i < len(z_big_pitch_list[j]):
                    z_big_pitch_list[0][i][0] += z_big_pitch_list[j][i][0]*(1-k)/(len(z_big_pitch_list)-1)
                    z_big_pitch_list[0][i][1] += z_big_pitch_list[j][i][1]*(1-k)/(len(z_big_pitch_list)-1)
                    
                else:
                    break
                    
    
                                                    
    l_c = get_z_list(l_two_measures_pitch_1ist[0], l_two_measures_chords_pitch_list[0])[1]
                                                              
    z_big_rhythm_list = [get_z_list(l_two_measures_rhythm_1ist[i], l_two_measures_chords_rhythm_list[i])[0] for i in range(len(l_two_measures_rhythm_1ist))]
    
    if len(z_big_rhythm_list) > 1:
        for i in range(len(z_big_rhythm_list[0])):
            z_big_rhythm_list[0][i][0] = z_big_rhythm_list[0][i][0]*k
            z_big_rhythm_list[0][i][1] = z_big_rhythm_list[0][i][1]*k
    
            for j in range(1, len(z_big_rhythm_list)):
                if i < len(z_big_rhythm_list[j]):
                    z_big_rhythm_list[0][i][0] += z_big_rhythm_list[j][i][0]*(1-k)/(len(z_big_rhythm_list)-1)
                    z_big_rhythm_list[0][i][1] += z_big_rhythm_list[j][i][1]*(1-k)/(len(z_big_rhythm_list)-1)
                    
                else:
                    break  
    
    generate_raw_piece(z_big_pitch_list[0], z_big_rhythm_list[0], l_c, beats[0], fn)

In [16]:
#formalize melody
f4 = open('chords4.txt', 'r')
chords4 = []
content = f4.readlines()
for i in content:
    chords4.append(i.split())
f4.close()
sample_midi = 'POP909_004_004.mid'
sample_chord = chords4

def formalize_melody(raw_midi, chord1, sample_midi, chord2):
    from_piece_to_result([raw_midi], [chord1], [sample_midi], [chord2], f'{raw_midi[:-4]}_formalized.mid')
    return f'{raw_midi[:-4]}_formalized.mid'

### Part 4: Midi-Level Composition

In [17]:
#Transformation of chords 1: Keep striking
def keep_striking(midi_data, quantized_chord_notes, beats):
    new_notes = []
    for note in quantized_chord_notes:
        for i in range(note['end_quantized']-note['start_quantized']):
            new_notes.append(pm.Note(start=(note['start_quantized'] + i)*beats[1]/4, end=(note['start_quantized'] + i + 1)*beats[1]/4, pitch=note['pitch'], velocity=70 if i == 0 else 50))
        
    midi_data.instruments[1].notes = new_notes

In [18]:
#Transformation of melody 1: Canon
def canon(midi_data, quantized_melody_notes, beats):
    for note in quantized_melody_notes:
        midi_data.instruments[0].notes.append(pm.Note(start=(note['start_quantized'] + 4)*beats[1]/4, end=(note['end_quantized'] + 4)*beats[1]/4, pitch=note['pitch'], velocity=70))

In [19]:
#Transformation of melody 2: Note Interpolation
def notes_interpolation(midi_data, quantized_melody_notes, beats):
    for i in range(len(quantized_melody_notes)-1):
        distance = quantized_melody_notes[i + 1]['start_quantized'] - quantized_melody_notes[i]['start_quantized']
        if distance > 5 and distance <= 20:
            pitch1 = quantized_melody_notes[i]
            pitch2 = quantized_melody_notes[i+1]
            while distance > 2:
                midi_data.instruments[0].notes.append(pm.Note(start=(pitch1['start_quantized'] + distance // 2)*beats[1]/4, end=(pitch1['end_quantized'] + distance // 2)*beats[1]/4, pitch=(pitch1['pitch'] + pitch2['pitch'])//2, velocity=90))
                pitch1 = quantized_melody_notes[-1]
                distance = distance // 2

In [20]:
#Midi level Reconstruction
def midi_composition(fn, mode = ""):
    midi_data = pm.PrettyMIDI(fn)
    beats = midi_data.get_beats()
    beat_id = np.arange(len(beats))
    subbeat_id = np.arange(len(beats) * 4) / 4.0
    keypoints = np.interp(subbeat_id, beat_id, beats)
    keypoint_boundaries = (keypoints[1:] + keypoints[:-1]) / 2
    
    #chord note quantization
    quantized_chord_notes = [{
    'start_quantized': quantize(keypoint_boundaries, note.start),
    'end_quantized': quantize(keypoint_boundaries, note.end),
    'pitch': note.pitch,
    'velocity' : note.velocity,
    } for note in midi_data.instruments[1].notes]
    
    #melody note quantization
    quantized_melody_notes = [{
    'start_quantized': quantize(keypoint_boundaries, note.start),
    'end_quantized': quantize(keypoint_boundaries, note.end),
    'pitch': note.pitch,
    'velocity' : note.velocity,
    } for note in midi_data.instruments[0].notes]
    
    mode_list = mode.split()
    if 'K' in mode_list:
        keep_striking(midi_data, quantized_chord_notes, beats)
        
    if 'C' in mode_list:
        canon(midi_data, quantized_melody_notes, beats)
        
    if 'I' in mode_list:
        notes_interpolation(midi_data, quantized_melody_notes, beats)
        
    midi_data.write(fn)

### Part 5: Wave-Level Synthesis

In [21]:
def midi_to_freq(midi_fn):
    midi_file = pm.PrettyMIDI(midi_fn)
    all_notes = []
    end_times = []
    for track in midi_file.instruments:
        track_notes = np.zeros((len(track.notes), 4)) #[start, end, f, velo]
        for i in range(len(track.notes)):
            track_notes[i][0] = track.notes[i].start
            track_notes[i][1] = track.notes[i].end
            track_notes[i][2] = librosa.midi_to_hz(track.notes[i].pitch)
            track_notes[i][3] = track.notes[i].velocity
        end_time = track_notes[i][1]
        end_times.append(end_time)
        
        # Sort the track notes array by start time (for chord track)
        track_notes = np.array(sorted(track_notes, key=itemgetter(0)))
        
        # Separate the three notes if the track contains the chord
        if track_notes[0][0] == track_notes[1][0] and track_notes[1][0] == track_notes[2][0]: # Determine whether the track is chord by the start time of the first three notes
            sub_track_1 = np.array([track_notes[i*3] for i in range(len(track_notes)//3)])
            sub_track_2 = np.array([track_notes[i*3+1] for i in range(len(track_notes)//3)])
            sub_track_3 = np.array([track_notes[i*3+2] for i in range(len(track_notes)//3)])
            track_notes = [sub_track_1, sub_track_2, sub_track_3]
        all_notes.append(track_notes)
        beat_length = midi_file.get_beats()[1]
    return all_notes, beat_length, end_times

In [22]:
def cos_wave(A, f, t_start, t_end, overhang_depth, fm_depth, fm_freq, sr=44100):
    """
    Generate original sound wave. FM is implemented in the function, 
    AM is used to avoid click

    Parameters
    ----------
    A : TYPE Integer
        DESCRIPTION. The amplitude of the wave
    f : TYPE Float
        DESCRIPTION. The frequency of the wave
    t : TYPE Float
        DESCRIPTION. The duration of the sound
    overhang_depth : TYPE Float
        DESCRIPTION. The FM systhesis depth for overhang generation
    fm_depth : TYPE Float
        DESCRIPTION. The FM synthesis depth for vibrato
    fm_freq : TYPE Float
        DESCRIPTION. The FM synthesis frequency for vibrato
    sr : TYPE Integer, optional
        DESCRIPTION. The sample rate. The default is 44100.

    Returns
    -------
    numpy.ndarray
        DESCRIPTION. The original wave (only with FM and AM)

    """
    a = [A * cos(2*pi*f*x + overhang_depth*cos(2*pi*f*x) + fm_depth*cos(2*pi*fm_freq*x))\
     for x in arange(0,t_end-t_start,1./sr)]
    window = np.hanning(len(a))
    #window[:1000] = [exp(x)-e+1 for x in arange(0, 1, 1/1000)]
    #window[-1000:] = [exp(-1*x) for x in arange(0, 1, 1/1000)]
    result = np.array(a)*window
    return result

In [23]:
def piano_sound(A, f, t_start, t_end, sr=44100):
    Y = np.array([sin(2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) for x in arange(0,t_end-t_start,1./sr)])
    Y += [sin(2 * 2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) / 2 for x in arange(0,t_end-t_start,1./sr)]
    Y += [sin(3 * 2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) / 4 for x in arange(0,t_end-t_start,1./sr)]
    Y += [sin(4 * 2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) / 8 for x in arange(0,t_end-t_start,1./sr)]
    Y += [sin(5 * 2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) / 16 for x in arange(0,t_end-t_start,1./sr)]
    Y += [sin(6 * 2 * pi * f * x) * exp(-0.0004 * 2 * pi * f * x) / 32 for x in arange(0,t_end-t_start,1./sr)]
    window = np.hanning(len(Y))
    #window[:1000] = np.array([exp(x)-e+1 for x in arange(0, 1, 1/1000)])
    #window[-1000:] = np.array([exp(-x) for x in arange(0, 1, 1/1000)])
    result = Y*window
    return result

In [24]:
def generate_piece(note_list, end_time, sr = 44100):
    all_notes_wav = np.zeros((int(end_time*sr)))
    for track in note_list:
        if type(track)==list:
            for sub_track in track:
                for note in sub_track:
                    note_wav = piano_sound(note[-1], note[2], note[0], note[1], sr)
                    try:
                        all_notes_wav[int(note[0]*sr):int(note[1]*sr)] += note_wav
                    except:
                        all_notes_wav[int(note[0]*sr):int(note[1]*sr)] += note_wav[:-1]
        else:
            for note in track:
                note_wav = piano_sound(note[-1], note[2], note[0], note[1], sr)
                try:
                    all_notes_wav[int(note[0]*sr):int(note[1]*sr)] += note_wav
                except:
                    all_notes_wav[int(note[0]*sr):int(note[1]*sr)] += note_wav[:-1]
    #sf.write(wav_fn, all_notes_wav, samplerate=sr)
    return all_notes_wav

In [25]:
def separate_chords(chord_note_list, unit_time):
    separate_note_list = []
    b = 0
    for i in range(len(chord_note_list[0])):
        note_number = round((chord_note_list[0][i][1]-chord_note_list[0][i][0]), 5)//round(unit_time, 5)
        #print(note_number)
        a = 0
        while a < note_number:
            ind = random.randint(0, 2)
            if chord_note_list[ind][i][2] != b:
                b = chord_note_list[ind][i][2]
                separate_note_list.append(np.array([round(chord_note_list[0][i][0]+a*unit_time, 4), round(chord_note_list[0][i][0]+(a+1)*unit_time, 4),\
                                                   chord_note_list[ind][i][2], chord_note_list[ind][i][-1]]))
                a += 1
        try:
            separate_note_list.append(np.array([round(chord_note_list[0][i][0]+(a)*unit_time, 4), round(chord_note_list[0][i+1][0], 4),\
                                                chord_note_list[0][i][2], chord_note_list[0][i][-1]]))
        except:
            pass
    return np.array(separate_note_list)

In [26]:
def midi_to_wav(midi_fn, chord_separation=False, sr = 44100):
    all_notes, beat_length, end_times = midi_to_freq(midi_fn)
    if chord_separation == False:
        notes_for_chord = [all_notes[1]]
    else:
        separated_chord = separate_chords(all_notes[1], beat_length/4)
        notes_for_chord = [separated_chord]
    
    melody = generate_piece([all_notes[0]], end_times[0], sr)
    chord = generate_piece(notes_for_chord, end_times[1], sr)
    
    if len(melody) != len(chord):
        melody_new = np.zeros(max(len(melody), len(chord)))
        chord_new = np.zeros(max(len(melody), len(chord)))
        melody_new[:len(melody)] += melody
        chord_new[:len(chord)] += chord
        melody = melody_new
        chord = chord_new
        del melody_new, chord_new
        
    return melody, chord

### User Interface

In [None]:
from tkinter import *
from tkinter import ttk
from tkinter.filedialog import askopenfilename

class MusicStyleSynthesizer:

    def __init__(self, root):

        root.title("Music Style Synthesizer")
        root.geometry('1068x381+10+10')
        mainframe = ttk.Frame(root, padding="3 3 12 12")
        mainframe.grid(column=0, row=0, sticky=(N, W, E, S))
        root.columnconfigure(0, weight=1)
        root.rowconfigure(0, weight=1)
       
        self.feet = StringVar()
        feet_entry = ttk.Entry(mainframe, width=7, textvariable=self.feet)
        feet_entry.grid(column=2, row=1, sticky=(W, E))
        
        self.feet2 = StringVar()
        feet_entry = ttk.Entry(mainframe, width=7, textvariable=self.feet2)
        feet_entry.grid(column=4, row=1, sticky=(W, E))
        
        self.feet3 = StringVar()
        feet_entry = ttk.Entry(mainframe, width=7, textvariable=self.feet3)
        feet_entry.grid(column=6, row=1, sticky=(W, E))
        
        self.feet4 = StringVar()
        feet_entry = ttk.Entry(mainframe, width=7, textvariable=self.feet4)
        feet_entry.grid(column=2, row=2, sticky=(W, E))
        
        self.feet5 = StringVar()
        feet_entry = ttk.Entry(mainframe, width=7, textvariable=self.feet5)
        feet_entry.grid(column=2, row=3, sticky=(W, E))
        
        self.meters = StringVar()

        ttk.Label(mainframe, textvariable=self.meters).grid(column=4, row=3, sticky=(W, E))
        ttk.Button(mainframe, text="Generate", command=self.generate).grid(column=2, row=4, sticky=W)

        ttk.Label(mainframe, text="Pitch Input Files").grid(column=1, row=1, sticky=W)
        ttk.Label(mainframe, text="Rhythm Input Files").grid(column=3, row=1, sticky=W)
        ttk.Label(mainframe, text="Weight for Major File").grid(column=5, row=1, sticky=W)
        ttk.Label(mainframe, text="Midi Composition Mode").grid(column=1, row=2, sticky=E)
        ttk.Label(mainframe, text="K:Keep Striking, C:Canon, I:Pitch Interpolation").grid(column=4, row=2, sticky=W)
        ttk.Label(mainframe, text="Chords Decomposition").grid(column=1, row=3, sticky=W)
        ttk.Label(mainframe, text="Enter: Yes/No").grid(column=4, row=3, sticky=W)
        #ttk.Label(mainframe, text="For filenames, please enter space before you enter the next filename").grid(column=1, row=7, sticky=W)
        #ttk.Label(mainframe, text="Major file is defined as the first file entered, the weight k will be assigned to this file, and the remaining weight will be distributed equally among other files").grid(column=1, row=7, sticky=W)
        
        for child in mainframe.winfo_children(): 
            child.grid_configure(padx=5, pady=5)

        feet_entry.focus()
        root.bind("<Return>", self.generate)
        
    def generate(self, *args):
        try:
            value1 = str(self.feet.get())
            value2 = str(self.feet2.get())
            k = float(self.feet3.get())
            if k < 0 or k > 1:
                self.meters.set("Invalid Weight")
                return
                
            mode = str(self.feet4.get())
            chord_decomp = str(self.feet5.get())
            
            self.meters.set("Begin Processing")
            
            list1 = [filename + '.wav' for filename in value1.split()]
            list2 = [filename + '.wav' for filename in value2.split()]
            
            midi_input_l1 = []
            midi_input_l2 = []
            chords1 = []
            chords2 = []
            for fn in list1:
                temp = melody_and_chords_extraction(fn)
                midi_input_l1.append(temp[0])
                chords1.append(temp[1])
                
            for fn in list2:
                temp = melody_and_chords_extraction(fn)
                midi_input_l2.append(temp[0])
                chords2.append(temp[1]) 
             
            formalized_input_l1 = []
            formalized_input_l2 = []
            for i in range(len(midi_input_l1)):
                formalized_input_l1.append(formalize_melody(midi_input_l1[i], chords1[i], sample_midi, sample_chord))
            
            for j in range(len(midi_input_l2)):
                formalized_input_l2.append(formalize_melody(sample_midi, sample_chord, midi_input_l2[j], chords2[j]))
                
            self.meters.set("Generating Piece")
            from_piece_to_result(formalized_input_l1, chords1, formalized_input_l2, chords2, "Result.mid", k)
            
            self.meters.set("Midi-Level Composing")
            midi_composition("Result.mid", mode)
            
            self.meters.set("Synthesis")
            if chord_decomp == "Yes":
                melody, chord = midi_to_wav("Result.mid", chord_separation=True, sr = 44100)
            else:
                melody, chord = midi_to_wav("Result.mid", chord_separation=False, sr = 44100)
                    
            data = melody + chord
            sf.write("Result.wav", data, 44100)
            self.meters.set("New File Generated Successfully!!")
            
        except ValueError:
            pass

root = Tk()
MusicStyleSynthesizer(root)
root.mainloop()

INFO:tensorflow:Apply unet for vocals_spectrogram
Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Apply unet for accompaniment_spectrogram
INFO:tensorflow:Restoring parameters from pretrained_models/2stems/model


Exception in Tkinter callback
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.8/tkinter/__init__.py", line 1883, in __call__
    return self.func(*args)
  File "<ipython-input-27-00ce765c4adf>", line 79, in generate
    temp = melody_and_chords_extraction(fn)
  File "<ipython-input-6-dcd6645e53ed>", line 2, in melody_and_chords_extraction
    spleeter_separate(fn)
  File "<ipython-input-2-2062254cce4f>", line 6, in spleeter_separate
    os.mkdir(f'{file_name[:-4]}_processed_audio')
FileExistsError: [Errno 17] File exists: 'demo1_processed_audio'
