CSE 190 Final Oroject

In [52]:
import mido
import numpy as np
from mido import MidiFile, Message, MidiTrack
import glob
import pickle
import random


import tensorflow as tf

from keras.models import Sequential

from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Lambda
from keras.layers import Dropout
from keras.layers import Softmax
from keras.layers import Activation
from keras.layers import Bidirectional
from keras.layers import BatchNormalization

from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

In [2]:
def create_network(network_input, n_vocab):
    """ Define constants """
    hidden_layers = 256
    dropout = 0.4
    
    """ Initializing model """
    model = Sequential()
    
    """ Adding LSTM Layers to Model """
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            ),
            input_shape=(network_input.shape[1], network_input.shape[2])
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout,
                return_sequences=True
            )
        )
    )
    model.add(
        Bidirectional(
            LSTM(
                hidden_layers,
                dropout=dropout
            )
        )
    )
    
    """ Add other layers after LSTM """
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(hidden_layers))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(dropout))
    model.add(Dense(n_vocab))
    model.add(Lambda(lambda x: x / 0.6))
    model.add(Softmax())
    
    """ Define the loss function for the model """
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    
    return model


In [3]:
# LUT for array indices storing note bits
channels = [0,1,2,9]
chanToArr = [0,1,2,0,0,0,0,0,0,3]

def parse( input_midi ):
    i = 1               # counter for each print statement
    listo = []

    prevChan = 0        # keeps track of previous channel to detect channel changes
    chanIt = 0          # iterates to the next channel in the forced order 0->1->2->9
    arr = [[0]*128]*4    # 12 notes each for channels(0,1,2,9) to keep track of continued notes

    # insert MIDI file here
    mid = MidiFile(input_midi) 
    temp = mido.merge_tracks(mid.tracks)

    # processing MIDI file
    for msg in temp:
        # look for note changes
        if(msg.type=='note_on'):

            # check for a new channel
            if(msg.channel != prevChan): 
                chanIt = chanIt + 1
                if(chanIt == 4):
                    chanIt = chanIt -4
                    listo.append([0,0,0])
                    i=i+1

                # iterate chanIt to "catch up" to the next listed channel
                while( channels[chanIt] != msg.channel):

                    # continue notes if not turned off
                    for a in range(len(arr[chanToArr[ channels[chanIt] ]])):
                        if(arr[chanToArr[ channels[chanIt] ]][a]>0):
                            listo[ arr[chanToArr[ channels[chanIt] ]][a] - 1 ][2] = listo[ arr[chanToArr[ channels[chanIt] ]][a] - 1 ][2] + 1
                    chanIt = chanIt + 1
                    if(chanIt == 4):
                        chanIt = chanIt -4
                        listo.append([0,0,0])
                        i=i+1
                    
            # check if note_on event is on or off switch
            if(msg.velocity==0):
                noteSwitch = "off"
                arr[chanToArr[msg.channel]][msg.note] = 0
            else:
                noteSwitch = "on"
                arr[chanToArr[msg.channel]][msg.note] = i        
                # print new note
                listo.append([msg.note , msg.channel, 1])
                i=i+1

            # update prevChan for detection
            prevChan = msg.channel

    for t in range(len(listo)):
        listo[t] = str(listo[t][0]) + "." + str(listo[t][1]) + "." + str(listo[t][2])
        
    return listo

In [4]:
def get_notes():
    i=0
    for file in glob.glob("multi-lstm-music/samples/small/*.mid"):

        print("Parsing %s" % file)        
        if(i==0):
            notes = parse(file)
            i=i+1
        else:
            notes = np.concatenate((notes, parse(file)))
    
    pickle.dump(notes, open('notes.p', 'wb'))
    return notes

def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 4 

    pitchnames = sorted(set(item for item in notes))

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    
    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
    
    n_patterns = len(network_input)
    # reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

    # normalize input
    network_input = network_input / float(n_vocab)

    network_output = to_categorical(network_output)
    
    return (network_input, network_output)

In [56]:
def train_network():
    """ Train a Neural Network to generate music """
    notes = get_notes()
    n_vocab = len(set(notes))
    
    network_input, network_output = prepare_sequences(notes, n_vocab)
    
    model = create_network(network_input, n_vocab)
 
    checkpoint = ModelCheckpoint(
        "weights2-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5",
        monitor='loss',
        verbose=0,
        save_best_only=True,
        mode='min'
    )
    
    callbacks_list = [checkpoint]

    # Your line of code here
    model.fit(network_input, network_output, batch_size=32, epochs=75, verbose=1, callbacks=callbacks_list)

train_network()

Parsing multi-lstm-music/samples/small\084_DragonBuster_00_01TitleScreen.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_01_02RoundStart.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_02_03RengaBGM.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_03_04CatacombBGM.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_04_05LimestoneCaveBGM.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_05_06RoomGuardCombatBGM.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_06_07IwayamaDragonBGM.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_07_08PrincessAppears.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_08_09PrincessLoveSceneMusic.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_09_10GameOver.mid
Parsing multi-lstm-music/samples/small\084_DragonBuster_10_11Unknown.mid
Parsing multi-lstm-music/samples/small\086_DragonFighter_00_01Title.mid
Parsing multi-lstm-music/samples/small\086_DragonFighter_01_

In [87]:
def prepare_sequences_prediction(notes, pitchnames, n_vocab):

    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    sequence_length = 4
    network_input = []
    output = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # reshape the input into a format compatible with LSTM layers
    normalized_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # normalize input
    normalized_input = normalized_input / float(n_vocab)

    return (network_input, normalized_input)

def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Starts the melody by picking a random sequence from the input as a starting point
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    pattern = network_input[start]
    prediction_output = []


    for note_index in range(1000):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        ### Copy the line below from your above implementation.
        prediction =   model.predict( prediction_input )
        
        sum = 0
        i = 0
        
        for a in prediction[0]:
            sum = sum + a
        x = random.random() * sum
        
        for a in prediction[0]:
            x = x - a
            if(x < 0):
                break
            i = i+1

        index = i
        result = int_to_note[index]
        prediction_output.append(result)

        pattern.append(index)
        pattern = pattern[1:len(pattern)]
        
    return prediction_output

In [88]:
def generate():
    notes = pickle.load(open('notes.p', 'rb'))
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(set(notes))

    network_input, normalized_input = prepare_sequences_prediction(notes, pitchnames, n_vocab)
    model = create_network(normalized_input, n_vocab)
    
    ### Add a line to load the weights here
    
    model.load_weights("weights2-improvement-73-3.0493-bigger.hdf5")
    
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)
    
def create_midi(prediction_output):
    i = 0
    for a in prediction_output:
        prediction_output[i] = [int(x) for x in a.split('.')]
        i=i+1
    
    t0notes = []
    t1notes = []
    t2notes = []
    t9notes = []
    
    for a in prediction_output:
        if(a[2] == 0):
            pass
        else:       
            if( a[1] == 0 ):
                t0notes.append([a[0],a[2]])
            elif( a[1] == 1 ):
                t1notes.append([a[0],a[2]])
            elif( a[1] == 2 ):
                t2notes.append([a[0],a[2]])
            elif( a[1] == 9 ):
                t9notes.append([a[0],a[2]])
                                    
#     print(t0notes)
#     print(t1notes)
#     print(t2notes)
#     print(t9notes)
    
    
    mid = MidiFile()
    mTrack = MidiTrack()
    track0 = MidiTrack()
    track1 = MidiTrack()
    track2 = MidiTrack()
    track9 = MidiTrack()
    mid.tracks.append(mTrack)
    mid.tracks.append(track0)
    mid.tracks.append(track1)
    mid.tracks.append(track2)
    mid.tracks.append(track9)

    track0.append(Message('program_change', channel=0, program=80, time=0))
    track1.append(Message('program_change', channel=1, program=81, time=0))
    track2.append(Message('program_change', channel=2, program=38, time=0))
    track9.append(Message('program_change', channel=9, program=121, time=0))

    for a in t0notes:
        track0.append(Message('note_on', channel=0, note=a[0], time=a[1]*100))
        track0.append(Message('note_on', channel=0, note=a[0], velocity=0))
    for a in t1notes:
        track1.append(Message('note_on', channel=1, note=a[0], time=a[1]*100))
        track1.append(Message('note_on', channel=1, note=a[0], velocity=0))
    for a in t2notes:
        track2.append(Message('note_on', channel=2, note=a[0], time=a[1]*100))
        track2.append(Message('note_on', channel=2, note=a[0], velocity=0))
    for a in t9notes:
        track9.append(Message('note_on', channel=9, note=a[0], time=a[1]*100))
        track9.append(Message('note_on', channel=9, note=a[0], velocity=0))

    


    mid.save('output.mid')

generate()
