# Jazz melody generation using LSTM RNNs

Using data from the Weimar Jazz Database and based on Jason Brownlee's LSTM text generation tutorial.

Currently this only takes in a single MIDI file containing the melody track; further notebooks will explore multiple MIDI files, harmony mappings, and who knows what else!

### Imports

In [165]:
import datetime
import re

import h5py
import keras
import mido
import numpy as np

### Load the data

In [211]:
# midi_file = mido.MidiFile("../data/midi/ArtPepper_Anthropology_FINAL.mid") # Unquantized
midi_file = mido.MidiFile("../data/midi_quantized/ArtPepper_Anthropology_FINAL.mid") # Quantized
midi_track = midi_file.tracks[0]
midi_notes = [msg for msg in midi_track if msg.type=="note_on" or msg.type=="note_off"]
len(midi_notes)
midi_notes[:100]

[<message note_on channel=0 note=65 velocity=104 time=0>,
 <message note_off channel=0 note=65 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=109 time=0>,
 <message note_off channel=0 note=63 velocity=109 time=144>,
 <message note_on channel=0 note=58 velocity=103 time=0>,
 <message note_off channel=0 note=58 velocity=103 time=48>,
 <message note_on channel=0 note=61 velocity=104 time=0>,
 <message note_off channel=0 note=61 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=114 time=0>,
 <message note_off channel=0 note=63 velocity=114 time=192>,
 <message note_on channel=0 note=58 velocity=106 time=0>,
 <message note_off channel=0 note=58 velocity=106 time=192>,
 <message note_on channel=0 note=58 velocity=98 time=0>,
 <message note_off channel=0 note=58 velocity=98 time=384>,
 <message note_on channel=0 note=50 velocity=90 time=1344>,
 <message note_off channel=0 note=50 velocity=90 time=192>,
 <message note_on channel=0 note=57 velocity=113 t

### Format the data

In [65]:
# Normalize note velocities
# TODO: Play with normalizing other parameters
for note in midi_notes:
    note.velocity = note.velocity - (note.velocity % 10)
set([note.velocity for note in midi_notes])

{70, 80, 90, 100, 110, 120}

In [179]:
# Create note set
note_events_keys = ("type", "pitch", "velocity", "duration")
note_events = [(note.type, note.note, note.velocity, note.time) for note in midi_notes]
note_set = sorted(list(set(note_events)))
num_note_events = len(note_events)
num_unique_notes = len(note_set)
print("{} unique notes in note set (vs. {} note events in MIDI file)".format(num_unique_notes, num_note_events))
note_set[:10]

805 unique notes in note set (vs. 1060 note events in MIDI file)


[('note_off', 50, 90, 192),
 ('note_off', 50, 102, 192),
 ('note_off', 51, 106, 112),
 ('note_off', 51, 107, 384),
 ('note_off', 51, 108, 96),
 ('note_off', 52, 110, 192),
 ('note_off', 53, 98, 96),
 ('note_off', 53, 101, 64),
 ('note_off', 53, 106, 96),
 ('note_off', 53, 109, 384)]

In [180]:
len([note for note in note_set if note[0] == "note_off"])

456

In [214]:
# Make map for note to integer
note_to_int = dict((n, i) for i, n in enumerate(note_set))
note_to_int

{('note_off', 50, 90, 192): 0,
 ('note_off', 50, 102, 192): 1,
 ('note_off', 51, 106, 112): 2,
 ('note_off', 51, 107, 384): 3,
 ('note_off', 51, 108, 96): 4,
 ('note_off', 52, 110, 192): 5,
 ('note_off', 53, 98, 96): 6,
 ('note_off', 53, 101, 64): 7,
 ('note_off', 53, 106, 96): 8,
 ('note_off', 53, 109, 384): 9,
 ('note_off', 53, 111, 48): 10,
 ('note_off', 54, 102, 160): 11,
 ('note_off', 54, 107, 96): 12,
 ('note_off', 55, 97, 64): 13,
 ('note_off', 55, 102, 64): 14,
 ('note_off', 55, 102, 256): 15,
 ('note_off', 55, 103, 144): 16,
 ('note_off', 55, 106, 64): 17,
 ('note_off', 55, 107, 96): 18,
 ('note_off', 55, 111, 48): 19,
 ('note_off', 55, 114, 96): 20,
 ('note_off', 56, 103, 48): 21,
 ('note_off', 56, 107, 96): 22,
 ('note_off', 56, 109, 96): 23,
 ('note_off', 56, 114, 96): 24,
 ('note_off', 57, 103, 96): 25,
 ('note_off', 57, 107, 64): 26,
 ('note_off', 57, 107, 96): 27,
 ('note_off', 57, 108, 96): 28,
 ('note_off', 57, 109, 96): 29,
 ('note_off', 57, 109, 160): 30,
 ('note_off

In [215]:
# Make map for integer back to note (we'll need this in the generation phase)
int_to_note = dict((i, n) for i, n in enumerate(note_set))
int_to_note

{0: ('note_off', 50, 90, 192),
 1: ('note_off', 50, 102, 192),
 2: ('note_off', 51, 106, 112),
 3: ('note_off', 51, 107, 384),
 4: ('note_off', 51, 108, 96),
 5: ('note_off', 52, 110, 192),
 6: ('note_off', 53, 98, 96),
 7: ('note_off', 53, 101, 64),
 8: ('note_off', 53, 106, 96),
 9: ('note_off', 53, 109, 384),
 10: ('note_off', 53, 111, 48),
 11: ('note_off', 54, 102, 160),
 12: ('note_off', 54, 107, 96),
 13: ('note_off', 55, 97, 64),
 14: ('note_off', 55, 102, 64),
 15: ('note_off', 55, 102, 256),
 16: ('note_off', 55, 103, 144),
 17: ('note_off', 55, 106, 64),
 18: ('note_off', 55, 107, 96),
 19: ('note_off', 55, 111, 48),
 20: ('note_off', 55, 114, 96),
 21: ('note_off', 56, 103, 48),
 22: ('note_off', 56, 107, 96),
 23: ('note_off', 56, 109, 96),
 24: ('note_off', 56, 114, 96),
 25: ('note_off', 57, 103, 96),
 26: ('note_off', 57, 107, 64),
 27: ('note_off', 57, 107, 96),
 28: ('note_off', 57, 108, 96),
 29: ('note_off', 57, 109, 96),
 30: ('note_off', 57, 109, 160),
 31: ('note

In [183]:
# Split into subsequences
# TODO: Play with sequence lengths (for both input and outputs)
seq_length = 10
data_input = [] # "X"
data_output = [] # "y"
for i in range(num_note_events-seq_length):
    seq_input = note_events[i:i+seq_length]
    seq_output = note_events[i+seq_length]
    data_input.append([note_to_int[note] for note in seq_input])
    data_output.append(note_to_int[seq_output])
num_seqs = len(data_input)
print("{} sequences".format(num_seqs))
print("{} ==> {}".format(data_input[0], data_output[0]))
data_input[:5]

1050 sequences
[587, 173, 561, 137, 492, 40, 527, 91, 565, 146] ==> 495


[[587, 173, 561, 137, 492, 40, 527, 91, 565, 146],
 [173, 561, 137, 492, 40, 527, 91, 565, 146, 495],
 [561, 137, 492, 40, 527, 91, 565, 146, 495, 45],
 [137, 492, 40, 527, 91, 565, 146, 495, 45, 488],
 [492, 40, 527, 91, 565, 146, 495, 45, 488, 35]]

In [184]:
# Reshape input sequences into form [samples, time steps, features]
X = np.reshape(data_input, (num_seqs, seq_length, 1))

# Normalize to 0-1 range
X = X / float(num_unique_notes)

# Convert output to one-hot encoding
y = keras.utils.np_utils.to_categorical(data_output)

In [185]:
print(X[0])
print("==>")
print(y[0])

[[ 0.72919255]
 [ 0.21490683]
 [ 0.69689441]
 [ 0.17018634]
 [ 0.61118012]
 [ 0.04968944]
 [ 0.65465839]
 [ 0.11304348]
 [ 0.70186335]
 [ 0.18136646]]
==>
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  

### Define the LSTM model

In [186]:
# Remembering what our shape is
"X.shape = {}, y.shape = {}".format(X.shape, y.shape)

'X.shape = (1050, 10, 1), y.shape = (1050, 805)'

In [187]:
# Define the model
model = keras.models.Sequential()
model.add(keras.layers.LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(256))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [188]:
# Setup checkpoints
checkpoint_path = "weights_{epoch:02d}_{loss:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
callbacks = [checkpoint]

In [189]:
# Fit the model (i.e. train the network)!
# TODO: Play with these parameters, of course
num_epochs = 100
batch_size = 32
model.fit(X, y, epochs=num_epochs, batch_size=batch_size, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1236347f0>

### Generate output notes

In [190]:
# Load network weights and recompile
weights_filename = "weights_99_0.9724.hdf5" # Using only note ons
weights_filename = "weights_99_1.3571.hdf5" # Using both note ons and note offs
model.load_weights(weights_filename)
model.compile(loss="categorical_crossentropy", optimizer="adam")
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 10, 256)           264192    
_________________________________________________________________
dropout_5 (Dropout)          (None, 10, 256)           0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 805)               206885    
Total params: 996,389
Trainable params: 996,389
Non-trainable params: 0
_________________________________________________________________


In [191]:
# Start with a random seed
seq_in = data_input[np.random.randint(num_seqs)]
[int_to_note[i] for i in seq_in]

[('note_on', 82, 117, 0),
 ('note_off', 82, 117, 96),
 ('note_on', 84, 118, 0),
 ('note_off', 84, 118, 96),
 ('note_on', 86, 117, 0),
 ('note_off', 86, 117, 384),
 ('note_on', 91, 110, 528),
 ('note_off', 91, 110, 288),
 ('note_on', 89, 115, 480),
 ('note_off', 89, 115, 96)]

In [196]:
# Generate the notes!
num_notes_to_generate = 100
notes_out = []

for i in range(num_notes_to_generate):
    # Reshape and normalize
    x = np.reshape(seq_in, (1, len(seq_in), 1)) # Reshape
    x = x / float(num_unique_notes) # Normalize
    
    # Make the prediction
    pred = model.predict(x, batch_size=batch_size, verbose=0)
    
    # Get output note
    note_idx = np.argmax(pred)
    note = int_to_note[note_idx]
    
    # Add output note to list
    notes_out.append(note)
    
    # Add output note to input sequence, and move forward by one note
    seq_in.append(note_idx) 
    seq_in = seq_in[1:len(seq_in)]

notes_out[:20]

[('note_on', 63, 106, 0),
 ('note_off', 66, 114, 96),
 ('note_off', 74, 113, 96),
 ('note_off', 74, 113, 96),
 ('note_off', 70, 110, 96),
 ('note_off', 62, 114, 64),
 ('note_off', 55, 114, 96),
 ('note_off', 67, 112, 96),
 ('note_on', 57, 109, 0),
 ('note_off', 57, 109, 160),
 ('note_on', 51, 108, 0),
 ('note_on', 53, 111, 576),
 ('note_on', 58, 113, 0),
 ('note_on', 55, 106, 0),
 ('note_on', 61, 113, 0),
 ('note_on', 61, 113, 0),
 ('note_off', 55, 107, 96),
 ('note_off', 70, 115, 192),
 ('note_off', 75, 117, 288),
 ('note_off', 66, 114, 96)]

In [208]:
# Convert the sequence of note tuples into a sequence of MIDI notes

# Create MIDI file and track
midi_file_out = mido.MidiFile()
midi_track_out = mido.MidiTrack()
midi_file_out.tracks.append(midi_track_out)

# Append "headers" (track name, tempo, key, time signature)
for message in midi_track[:4]:
    midi_track_out.append(message)

# Add notes
prev_time = 0
prev_note = 0
for note in notes_out:
    #curr_time = prev_time + note[2]
    #prev_note = note[0]
    #prev_time = curr_time
    #message_noteoff = mido.Message("note_off", note=prev_note, velocity=0, time=curr_time) # Prev note off
    #message_noteon = mido.Message("note_on", note=note[0], velocity=note[1], time=curr_time) # Curr note on
    #midi_track_out.append(message_noteoff)
    #midi_track_out.append(message_noteon)
    
    #curr_time = prev_time + note[3] if note[0]=="note_on" else prev_time
    curr_time = prev_time + note[3]
    prev_time = curr_time
    message = mido.Message(note[0], note=note[1], velocity=note[2], time=curr_time)
    midi_track_out.append(message)
    
# Save file to disk
filename_out = str(datetime.datetime.now())
filename_out = re.sub("\W+", "", filename_out)
filename_out = "../data/out_{}.mid".format(filename_out)
midi_file_out.save(filename_out)

for message in midi_track_out[4:100]:
    print(message)

note_on channel=0 note=63 velocity=106 time=0
note_off channel=0 note=66 velocity=114 time=96
note_off channel=0 note=74 velocity=113 time=192
note_off channel=0 note=74 velocity=113 time=288
note_off channel=0 note=70 velocity=110 time=384
note_off channel=0 note=62 velocity=114 time=448
note_off channel=0 note=55 velocity=114 time=544
note_off channel=0 note=67 velocity=112 time=640
note_on channel=0 note=57 velocity=109 time=640
note_off channel=0 note=57 velocity=109 time=800
note_on channel=0 note=51 velocity=108 time=800
note_on channel=0 note=53 velocity=111 time=1376
note_on channel=0 note=58 velocity=113 time=1376
note_on channel=0 note=55 velocity=106 time=1376
note_on channel=0 note=61 velocity=113 time=1376
note_on channel=0 note=61 velocity=113 time=1376
note_off channel=0 note=55 velocity=107 time=1472
note_off channel=0 note=70 velocity=115 time=1664
note_off channel=0 note=75 velocity=117 time=1952
note_off channel=0 note=66 velocity=114 time=2048
note_off channel=0 not