In [42]:
import glob
import pickle
import numpy as np
from music21 import converter, instrument, note, chord
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from io import BytesIO
from IPython.display import display, HTML
import pandas as pd
from sklearn import preprocessing

Train a LSTM to generate music 
music 21 is a module used for dealing with music data
Step1: we use a special music sheet data format which includes instruments, notes and chords called midi
We load the data from a file with a famous japanese composer joe hisaishi's music 
Using the converter.parse function of music 21, we will get a list of all notes and chords in the file.
We append the pitch of the every note object using its string notation
And we append every chord by encoding the id of chords' notes together into one string with each note seperated by a slash.
notes is the processed data. 

In [67]:
notes=[]
offsets=[]
duration=[]
composite=[]
for file in glob.glob("/Users/xiechuxi/Desktop/music/japan/*.mid"):
    midi = converter.parse(file)
    notes_to_parse = None
    parts = instrument.partitionByInstrument(midi)
    if parts: # file has instrument parts
        notes_to_parse = parts.parts[0].recurse()
    else: # file has notes in flat 
        notes_to_parse = midi.flat.notes
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            composite.append([str(element.pitch),element.duration,element.offset])
        elif isinstance(element, chord.Chord):
            composite.append(['|'.join(str(n) for n in element.normalOrder),element.duration,element.offset])
            
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            duration.append(element.duration)
        elif isinstance(element, chord.Chord):
            duration.append(element.duration)
            
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            offsets.append(element.offset)
        elif isinstance(element, chord.Chord):
            offsets.append(element.offset)
            
    
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            notes.append('|'.join(str(n) for n in element.normalOrder))



Now we get a stream of notes and chords indicated by their notes as characters and chords as number string seperated by '|' (i.e. 1|6|8).
For the following step we will transfer the notes stream of charaters and chords into indices. 

In [70]:
# get all pitch names
pitchnames = sorted(set(item for item in notes))

#map the pitch names into integers
noteindices = dict((note, number) for number, note in enumerate(pitchnames))

#reprocess the whole list into integer indices and seperate the list into input list and outout list we want to predict the next note after each 15 notes 
#but the sequence length could be changed to find the best tune for predict a melody
inputnet = []
outputnet = []
h=[]
sequence_length = 15


for i in range(0, len(notes) - sequence_length, 1):
    sequencenotein = notes[i:i + sequence_length]
    sequenceout = notes[i + sequence_length]
    inputnet.append([noteindices[char] for char in sequencein])
    outputnet.append(noteindices[sequenceout])


#reshape the input and output net into a format compatible with the LSTM net
inputnet = np.reshape(inputnet, (len(inputnet), sequence_length, 1))
#normalize the numerical value into categorical value
inputnet = inputnet
outputnet=np.array(outputnet)
outputnet = np_utils.to_categorical(outputnet)


{'0': 0,
 '0|2': 1,
 '0|2|3': 2,
 '0|2|3|7': 3,
 '0|2|4|7': 4,
 '0|2|5': 5,
 '0|2|5|7': 6,
 '0|2|6': 7,
 '0|2|7': 8,
 '0|3': 9,
 '0|3|5': 10,
 '0|3|7': 11,
 '0|4': 12,
 '0|5': 13,
 '0|6': 14,
 '1': 15,
 '10': 16,
 '10|0': 17,
 '10|0|2|3': 18,
 '10|0|5': 19,
 '10|1': 20,
 '10|1|5': 21,
 '10|2': 22,
 '10|2|5': 23,
 '10|3': 24,
 '11': 25,
 '11|0|4|6': 26,
 '11|2': 27,
 '11|2|4': 28,
 '11|2|5': 29,
 '11|2|6': 30,
 '11|3': 31,
 '11|3|6': 32,
 '11|4': 33,
 '11|4|5': 34,
 '1|4': 35,
 '1|4|6': 36,
 '1|4|7|9': 37,
 '1|4|8': 38,
 '1|5': 39,
 '1|5|7': 40,
 '1|5|8': 41,
 '1|6': 42,
 '2': 43,
 '2|3': 44,
 '2|3|7|10': 45,
 '2|4': 46,
 '2|4|7': 47,
 '2|4|7|9': 48,
 '2|4|9': 49,
 '2|5': 50,
 '2|5|9': 51,
 '2|6': 52,
 '2|6|9': 53,
 '2|7': 54,
 '2|8': 55,
 '3': 56,
 '3|5': 57,
 '3|5|10': 58,
 '3|5|7|10': 59,
 '3|6': 60,
 '3|7': 61,
 '3|7|10': 62,
 '3|7|11': 63,
 '3|8': 64,
 '3|9': 65,
 '4': 66,
 '4|10': 67,
 '4|5': 68,
 '4|5|9|11': 69,
 '4|6': 70,
 '4|6|11': 71,
 '4|6|9': 72,
 '4|7': 73,
 '4|7|11': 74,


After the processing of the notes, and seperate the notes into inputnet and outputnet, we can make the prediction. 

In [30]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb
from keras.callbacks import EarlyStopping
import numpy as np
from keras.layers import Flatten

# split the input and output nets into test and train set
testinputnet=inputnet[round(4/5*len(inputnet)):len(inputnet)]
testoutputnet=outputnet[round(4/5*len(inputnet)):len(inputnet)]
traininputnet=inputnet[0:round(4/5*len(inputnet))]
trainoutputnet=outputnet[0:round(4/5*len(inputnet))]

print('Build model...')
model = Sequential()
model.add(LSTM(300, dropout=0.0, recurrent_dropout=0.0,input_shape=(traininputnet.shape[1], traininputnet.shape[2])))
model.add(Dense(150, activation='relu'))
model.add(Dense(len(set(notes))))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')
monitor = EarlyStopping(monitor='loss', min_delta=1e-3, patience=10, verbose=1, mode='auto')
print('Train...')

model.fit(traininputnet,trainoutputnet,validation_data=(testinputnet,testoutputnet),callbacks=[monitor],verbose=2,epochs=400)

Build model...
Train...
Train on 4070 samples, validate on 1018 samples
Epoch 1/400
 - 4s - loss: 4.4231 - val_loss: 3.8735
Epoch 2/400
 - 3s - loss: 4.1647 - val_loss: 3.8982
Epoch 3/400
 - 3s - loss: 4.0597 - val_loss: 3.8867
Epoch 4/400
 - 3s - loss: 3.9690 - val_loss: 3.9361
Epoch 5/400
 - 3s - loss: 3.8807 - val_loss: 3.8946
Epoch 6/400
 - 3s - loss: 3.8049 - val_loss: 3.9794
Epoch 7/400
 - 3s - loss: 3.7176 - val_loss: 3.9765
Epoch 8/400
 - 3s - loss: 3.6409 - val_loss: 4.0212
Epoch 9/400
 - 3s - loss: 3.5514 - val_loss: 3.9953
Epoch 10/400
 - 3s - loss: 3.4726 - val_loss: 4.0634
Epoch 11/400
 - 3s - loss: 3.3849 - val_loss: 4.0423
Epoch 12/400
 - 3s - loss: 3.2976 - val_loss: 4.1377
Epoch 13/400
 - 3s - loss: 3.2100 - val_loss: 4.1551
Epoch 14/400
 - 3s - loss: 3.1209 - val_loss: 4.1529
Epoch 15/400
 - 3s - loss: 3.0427 - val_loss: 4.2771
Epoch 16/400
 - 3s - loss: 2.9489 - val_loss: 4.3072
Epoch 17/400
 - 3s - loss: 2.8729 - val_loss: 4.3270
Epoch 18/400
 - 3s - loss: 2.8134 - 

Epoch 154/400
 - 3s - loss: 0.3020 - val_loss: 8.1100
Epoch 155/400
 - 3s - loss: 0.3018 - val_loss: 8.1755
Epoch 156/400
 - 3s - loss: 0.2697 - val_loss: 8.3071
Epoch 157/400
 - 3s - loss: 0.2726 - val_loss: 8.2688
Epoch 158/400
 - 3s - loss: 0.2798 - val_loss: 8.2177
Epoch 159/400
 - 3s - loss: 0.2655 - val_loss: 8.1261
Epoch 160/400
 - 3s - loss: 0.3046 - val_loss: 8.1881
Epoch 161/400
 - 3s - loss: 0.3015 - val_loss: 8.3475
Epoch 162/400
 - 3s - loss: 0.2700 - val_loss: 8.6037
Epoch 163/400
 - 3s - loss: 0.2733 - val_loss: 8.4542
Epoch 164/400
 - 3s - loss: 0.2696 - val_loss: 8.4450
Epoch 165/400
 - 3s - loss: 0.3164 - val_loss: 8.4369
Epoch 166/400
 - 3s - loss: 0.3243 - val_loss: 8.5492
Epoch 167/400
 - 3s - loss: 0.3402 - val_loss: 8.4308
Epoch 168/400
 - 3s - loss: 0.3061 - val_loss: 8.6002
Epoch 169/400
 - 4s - loss: 0.2677 - val_loss: 8.5437
Epoch 00169: early stopping


<keras.callbacks.History at 0x1a295934a8>

After the training of the network is done we can start to make a prediction to the music now!(generate music).

In [32]:
from numpy import vstack

seed = np.random.randint(190, size=(10, 1))
#first generate a list of random notes if you want to be more creative with melody 
#you could write a small piece of notes by yourself or copy a piece online.

int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
pattern = seed#pattern is the iterator which each iteration will drop the first note and predict the next note according to the pattern
predictoutput=[]
#generate 500 notes as seed so that following melody could be created based on this seed
for i in range(400):
    predictinput = np.reshape(pattern, (1, len(pattern), 1))
    predictinput = predictinput 
    prediction = model.predict(predictinput, verbose=0)
    index = np.argmax(prediction)
    result = int_to_note[index]
    index=np.array([[index]])
    predictoutput.append(result)
    pattern=vstack((pattern, index))
    pattern = pattern[1:len(pattern)]
    


In [33]:
offset = 0
output_notes = []

for pattern in predictoutput:
    # pattern is a chord
    if ('|' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('|')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    # pattern is a note
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        #we can actually change the instrument in the further continuation of the project
        output_notes.append(new_note)
    
    offset+=0.5
    #to make sure notes don't stack

In [34]:
from music21 import *
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='/Users/xiechuxi/Desktop/music/musiccreate/gn13.mid')

'/Users/xiechuxi/Desktop/music/musiccreate/gn13.mid'