In [None]:
from music21 import *
import tensorflow as tf
from tensorflow.keras import *

from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
import tensorflow.keras.backend as K

from collections import Counter
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
import os

# Load the TensorBoard notebook extension.
%load_ext tensorboard

### Changes in v2 (VGM)
Here, we decided to change the input vectors, the reason for this is to attempt to fix the generative process. The reason the generative process was so finnicky was due to the large input vector size, so when a new note is generated, the vector won't change drastically, and would cause predictions to be similar. So we changed the input vector size from 32 to 16.

The difference between normal v2 and this is the lack of removing notes with small occurences.

In [None]:
# path to midi files
midi_path = "..\\..\\data\\videogame_midi_mini"
midi_dir = os.listdir(midi_path)
len(midi_dir)

In [None]:
def read_midi(file):   
    print("Loading Music File:",file)   
    notes = []
    notes_to_parse = None  
    
    #parsing a midi file
    midi = converter.parse(file)
    
    #grouping based on different instruments
    s2 = instrument.partitionByInstrument(midi)

    #Looping over all the instruments
    for part in s2.parts:
        #select elements of only piano
        if 'Piano' in str(part): 
            notes_to_parse = part.recurse() 
            
            #find whether a particular element is note or a chord
            for element in notes_to_parse: 
                #note
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                #chord
                elif isinstance(element, chord.Chord):
                    notes.append('.'.join(str(n) for n in element.normalOrder))
    return np.array(notes)

In [None]:
# create array of midi file notes
midi_array = np.array([read_midi(os.path.join(midi_path, f)) for f in midi_dir])

In [None]:
#converting 2D array into 1D array
notes_ = [element for note_ in midi_array for element in note_]

#No. of unique notes
unique_notes = list(set(notes_))
print(len(unique_notes))

In [None]:
# same as v2 but not redacting notes
freq = dict(Counter(notes_))

frequent_notes = [note_ for note_, count in freq.items() if count>=1]
print(len(frequent_notes))

In [None]:
new_music=[]

for notes in midi_array:
    temp=[]
    for note_ in notes:
        if note_ in frequent_notes:
            temp.append(note_)            
    new_music.append(temp)
    
midi_array = np.array(new_music)

In [None]:
# data prep
# changing the amountt of timesteps to be 16, will allow for predictor to change must faster during generative process,
# allegedly
no_of_timesteps = 16
x = []
y = []

for note_ in midi_array:
    for i in range(0, len(note_) - no_of_timesteps, 1):
        
        #preparing input and output sequences
        input_ = note_[i:i + no_of_timesteps]
        output = note_[i + no_of_timesteps]
        
        x.append(input_)
        y.append(output)
        
x = np.array(x)
y = np.array(y)

In [None]:
# assign unique int to each note
unique_x = list(set(x.ravel()))
x_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_x))

In [None]:
#preparing input sequences
x_seq=[]
for i in x:
    temp = []
    for j in i:
        #assigning unique integer to every note
        temp.append(x_note_to_int[j])
    x_seq.append(temp)
    
x_seq = np.array(x_seq)

In [None]:
unique_y = list(set(y))
y_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_y)) 
y_seq=np.array([y_note_to_int[i] for i in y])

In [None]:
x_tr, x_val, y_tr, y_val = train_test_split(x_seq,y_seq,test_size=0.2,random_state=0)
x_tr.shape

In [None]:
# let's just save the version with the lowest validation loss this time around
mc = callbacks.ModelCheckpoint(
    'vgg_mini_midis.hdf5',
    monitor='val_loss',
    mode='min',
    save_best_only = True,
    verbose=1
)

In [None]:
K.clear_session()
model = Sequential()
    
#embedding layer
model.add(Embedding(len(unique_x), 100, input_length = 16,trainable = True)) 

model.add(Conv1D(32,3, padding='causal', activation='relu'))
model.add(Dropout(0.2))
model.add(MaxPool1D(2))
    
model.add(Conv1D(64, 3, activation = 'relu', dilation_rate = 2, padding = 'causal'))
model.add(Dropout(0.2))
model.add(MaxPool1D(2))

model.add(Conv1D(128, 3, activation = 'relu', dilation_rate = 4, padding = 'causal'))
model.add(Dropout(0.2))
model.add(MaxPool1D(2))
            
model.add(GlobalMaxPool1D())
    
model.add(Dense(128, activation = 'relu'))
model.add(Dense(len(unique_y), activation = 'softmax'))
    
model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam')

model.summary()

In [None]:
history = model.fit(
    np.array(x_tr),
    np.array(y_tr),
    batch_size=128,
    epochs=500,
    validation_data=(
        np.array(x_val),
        np.array(y_val)
    ),
    verbose=1,
    callbacks=[mc]
)

In [None]:
%tensorboard --logdir logs/scalars

In [None]:
#loading best model
model = models.load_model('schubert_midis.hdf5')

In [None]:
import random
# create list of notes
predictions=[]

# create something x notes long
for i in range(16):
    # take random starting note from x_validation set
    ind = np.random.randint(0,len(x_val)-1)
    random_music = x_val[ind]
    # change vector to match dimension of timesteps
    random_music = random_music.reshape(1,no_of_timesteps)

    # predict a note for the given x model, and append maximum prob value note
    prob  = model.predict(random_music)[0]
    y_pred= np.argmax(prob,axis=0)
    predictions.append(y_pred)

    random_music = np.insert(random_music[0],len(random_music[0]),y_pred)
    random_music = random_music[1:]
    
print(predictions)

In [None]:
x_int_to_note = dict((number, note_) for number, note_ in enumerate(unique_x)) 
predicted_notes = [x_int_to_note[i] for i in predictions]
print(predicted_notes)

In [None]:
def convert_to_midi(prediction_output, name):
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                
                cn=int(current_note)
                new_note = note.Note(cn)
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
                
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
            
        # pattern is a note
        else:  
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 1
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='{}.mid'.format(name))

In [None]:
convert_to_midi(predicted_notes, "vg_mini_1")