## Vishnu Menon
### MIDI Music Generator
### 5/19/2020

In [32]:
## Imports 1
import pandas as pd 
from sklearn import preprocessing
import numpy as np
from sklearn.model_selection import train_test_split
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
from keras.utils import plot_model
from keras.layers import Activation
from music21 import *
import pretty_midi
import math
import sys
import random
np.set_printoptions(threshold=sys.maxsize)

In [5]:
## File lists
paths = ['Fantasie_Impromptu.mid', 'Solo_Violin_Sonata_No._1_in_G_Minor_-_J._S._Bach_BWV_1001.mxl',
         'Moonlight_Sonata_3rd_Movement_-_Ludwig_van_Beethoven.mxl', 'Paganini_Caprice_No_5_in_A_minor.mxl',
        'Liszt_Romance_S._169.mxl']
## Includes vitali chaconne in addition to paths
paths2 = ['Solo_Violin_Sonata_No._1_in_G_Minor_-_J._S._Bach_BWV_1001.mxl',
         'Moonlight_Sonata_3rd_Movement_-_Ludwig_van_Beethoven.mxl', 'Paganini_Caprice_No_5_in_A_minor.mxl',
          'Vitali_Chaconne_Solo_Violin.mxl', 'Liszt_Romance_S._169.mxl','Fantasie_Impromptu.mxl']

In [6]:
## Get notes and rests per instrument from score
def notesAndRests(score):
    instruments = instrument.partitionByInstrument(score)
    noteMatrix = []
    i = instruments[0]
    for NoteRestChord in i.notesAndRests:
        noteMatrix.append(NoteRestChord)
    return noteMatrix

In [7]:
## Replace noteMatrix with matrix containing tuples of pitch-offset information
## Pitches are used from this point on to identify recreate chords based on offset information because some notes,
## though in the same chord, can have varying durations. Note offsets in chords can also have discrepancies based
## on the file's condition. Pairing pitches to the offset of the chord they originate from avoids this. 
def pitchesAndOffsetTuples(score):
    for i in range(len(score)):
        element = score[i]
        pitchInfo = [element]
        if(element.isChord):
            pitchInfo = list(element.pitches)
        elif(element.isNote):
            pitchInfo = [element.pitch]
        score[i] = (pitchInfo, element.offset)
    return score

In [8]:
## Group pitches occuring at same offset into pitch-duration tuples
## Reconstruct the duration of a set of pitches to be added to the regrouped chords and notes
def groupPitchesByOffset(tupleArray):
    pitchesAndDuration = []
    arrayLen = len(tupleArray)
    i = 0
    while(i < arrayLen):
        pitches,offset = tupleArray[i]
        while(i + 1 < arrayLen and tupleArray[i + 1][1] == offset):
            i += 1
            # Add all of the pitches in the tuple with the same offset as tuple i to this offset's group of pitches
            if(len(tupleArray[i][0]) > 1  or type(tupleArray[i][0][0]) != type(note.Rest())):
                pitches.extend(tupleArray[i][0])
        dur = duration.Duration(quarterLength=4.0)
        if(i < arrayLen - 1):
            dur.quarterLength = tupleArray[i + 1][1] - offset
        if(type(pitches[0]) == type(note.Rest()) and len(pitches) > 1):
            pitches.pop(0)
        pitchesAndDuration.append((pitches,dur))
        i += 1
    return pitchesAndDuration

In [9]:
## Reconstruct notes and chords from the pitches and durations, used to test if the data is still faithful
## to the original piece. 
def reconstructListOfNotesAndDurations(tuplesArray):
    ## [] can be replaced by stream.Stream to create a stream instead of a list
    s = []
    for each in tuplesArray:
        pitches, d = each
        if(len(pitches) == 1 and type(pitches[0]) == type(note.Rest())):
            element = pitches[0]
        else:
            pitchNames = list(map(lambda x: x.nameWithOctave, pitches))
            if(len(pitchNames) > 1):
                element = chord.Chord(pitchNames)
            else:
                element = note.Note(pitchNames[0])
        element.duration = d
        s.append(element)
    return s

In [10]:
## Convert note-dur list to midi only multi label encoding
def noteToMidiNumbers(nList):
    # 88 to represent 88 midi encodings and 1 for rest
    data = np.zeros((len(nList), 102))
    for i in range(len(nList)):
        if(nList[i].isRest):
            data[i,88] = 1
        else:
            pitches = nList[i].pitches
            for e in pitches:
                data[i,e.midi] = 1
                ## REMOVE THIS IF YOU WANT TO ENCODE ALL NOTES IN A CHORD NOT JUST THE FIRST
                break
    return data

In [11]:
## Convert note-dur to midi with duration encoding
def noteToMidiDur(nList):
    # 130 to represent 128 midi encodings, 1 for rest, and 1 for duration
    data = np.zeros((len(nList), 130))
    for i in range(len(nList)):
        if(nList[i].isRest):
            data[i,128] = 1
        else:
            pitches = nList[i].pitches
            for e in pitches:
                data[i,e.midi] = 1
        data[i,129] = nList[i].duration.quarterLength
    return data

In [12]:
def getData(score):
        intermediate = notesAndRests(score)
        intermediate = pitchesAndOffsetTuples(intermediate)
        intermediate = groupPitchesByOffset(intermediate) 
        intermediate = reconstructListOfNotesAndDurations(intermediate)
        intermediate = noteToMidiNumbers(intermediate)
        print('''Number of notes: {0}'''.format(intermediate.shape[0]))
        return intermediate

In [13]:
## Group Multi-Label Encodings into Sequences and Corresponding Labels

## Consider altering function so that sequences can be found at halfway points between labels recursively up to a 
## certain depth. E.g. Sequences at every 0th offset, Seqlen/2 offset, SeqLen/4 offset, and so on. 
def getSeqsAndLabelsForSingleScore(data, SeqLen):
    ## data is a 2d numpy array, SeqLen is an integer
    numSeqs = math.floor(data.shape[0]/(SeqLen + 1))
    ## Numpy array of Seqs
    bridgeAddition = math.floor(numSeqs - math.floor(SeqLen/2) / SeqLen)
    SeqSet = np.zeros((numSeqs + bridgeAddition, SeqLen, data.shape[1]))
    ## Numpy array of Labels
    SeqLabels = np.zeros((numSeqs + bridgeAddition, data.shape[1]))
    for i in range(numSeqs - 1):
        SeqSet[i] = data[i*SeqLen : (i+1)*SeqLen]
        SeqLabels[i] = data[(i+1)*SeqLen]
    offset = math.floor(SeqLen/2)
    for i in range(numSeqs, numSeqs + bridgeAddition - 1):
        multiple = i - numSeqs
        SeqSet[i] = data[offset + multiple*SeqLen : offset + (multiple + 1)*SeqLen]
        SeqLabels[i] = data[offset + (multiple + 1)*SeqLen]
    return (SeqSet, SeqLabels)

## Every increasing permutation instead of every half sequence len
def getSeqsAndLabelsPermutations(data, SeqLen):
    ## data is a 2d numpy array, SeqLen is an integer
    numSeqs = math.floor(data.shape[0] - SeqLen)
    ## Numpy array of Seqs
    SeqSet = np.zeros((numSeqs, SeqLen, data.shape[1]))
    ## Numpy array of Labels
    SeqLabels = np.zeros((numSeqs, data.shape[1]))
    for i in range(numSeqs):
        SeqSet[i] = data[i : i + SeqLen]
        SeqLabels[i] = data[i + SeqLen]
    return (SeqSet, SeqLabels)

In [14]:
## Outputs a ndarray of (Num Sequences, Sequence Length, Num features) schema
def getSeqsAndLabels(scores, SeqLen):
    SeqSet, SeqLabels = getSeqsAndLabelsPermutations(getData(scores.pop(0)), SeqLen)
    print(SeqSet.shape, SeqLabels.shape)
    for each in scores:
        D, L = getSeqsAndLabelsPermutations(getData(each), SeqLen)
        SeqSet = np.concatenate((SeqSet, D))
        SeqLabels = np.concatenate((SeqLabels, L))
        print(SeqSet.shape, SeqLabels.shape)
    return (SeqSet, SeqLabels)
# Load Files and Extract streams
scores = list(map(lambda x: converter.parse(x).parts.stream(), paths))
Seqs, Labels = getSeqsAndLabels(scores, 50)
print('''Overall shape of sequences data {0}'''.format(Seqs.shape))

Number of notes: 1609
(1559, 50, 102) (1559, 102)
Number of notes: 2745
(4254, 50, 102) (4254, 102)
Number of notes: 2872
(7076, 50, 102) (7076, 102)
Number of notes: 318
(7344, 50, 102) (7344, 102)
Number of notes: 576
(7870, 50, 102) (7870, 102)
Overall shape of sequences data (7870, 50, 102)


In [15]:
## Train Model
model = Sequential()
model.add(LSTM(
    256,
    input_shape=(Seqs.shape[1], Seqs.shape[2]),
    return_sequences=True
))
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(256))
model.add(Dense(Seqs.shape[2], activation = 'softmax'))
model.compile(loss='s_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [16]:
## Train on everything except the first 10 samples
model.fit(Seqs[10:Seqs.shape[0]], Labels[10:Labels.shape[0]], epochs=80, batch_size=32)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80

KeyboardInterrupt: 

In [50]:
## Pass in the number of notes you would like the model to predict as an int

from numpy.random import choice

def getPredictions(n):
    inp = Seqs[698].tolist()
    predictions = []
    i = 0
    while(i < n):
        inpNP = np.asarray(inp)
        pred = model.predict(np.reshape(inpNP, (1,inpNP.shape[0],inpNP.shape[1])))
        ## Currently only chooses the maximum of the predicted array for storage
        inp.append(pred[0])
        draw = np.random.choice(np.arange(0,inpNP.shape[1]),p=pred[0], replace = True)
        predictions.append(draw)
        inp = inp[1:len(inp)]
        i += 1        
    return predictions
predNotes = getPredictions(200)
print(predNotes)

[75, 77, 73, 67, 68, 70, 77, 75, 75, 73, 80, 68, 70, 68, 69, 70, 72, 68, 96, 96, 94, 92, 88, 87, 85, 81, 82, 80, 76, 70, 80, 72, 69, 70, 75, 68, 70, 73, 72, 70, 68, 70, 70, 68, 73, 75, 77, 80, 78, 77, 75, 77, 75, 73, 75, 77, 73, 68, 70, 71, 71, 70, 75, 77, 78, 77, 75, 77, 73, 67, 68, 70, 77, 75, 75, 73, 80, 68, 70, 70, 69, 70, 72, 68, 96, 96, 27, 92, 88, 87, 85, 81, 82, 80, 76, 70, 80, 72, 69, 70, 75, 68, 70, 73, 72, 70, 68, 70, 70, 68, 73, 75, 77, 80, 78, 77, 75, 77, 75, 73, 75, 77, 73, 68, 70, 71, 71, 70, 75, 77, 78, 77, 75, 77, 73, 67, 68, 70, 77, 75, 75, 73, 80, 68, 70, 70, 69, 70, 72, 68, 84, 96, 94, 92, 88, 87, 85, 81, 82, 80, 76, 70, 80, 72, 69, 70, 75, 68, 70, 73, 72, 70, 68, 70, 70, 68, 73, 75, 77, 80, 78, 77, 75, 77, 75, 73, 75, 77, 73, 68, 70, 71, 71, 70, 75, 77, 78, 77, 75, 77]


In [48]:
def recreateStream(midis):
    s = stream.Stream()
    for m in midis:
        p = pitch.Pitch(m)
        n = note.Note()
        n.pitch = p
        n.duration = duration.Duration(quarterLength = 1)
        s.append(n)
    return s
s = recreateStream(predNotes)

In [49]:
## Write to midi file
slStream = s.augmentOrDiminish(0.50)
slStream.write('midi', fp='Output.mid')

'Output.mid'