# Jazz melody generation using LSTMs

Using data from the Weimar Jazz Database and based on Jason Brownlee's LSTM text generation tutorial.

Currently this only takes in a single MIDI file containing the melody track; further notebooks will explore multiple MIDI files, harmony mappings, and who knows what else!

### Imports

In [165]:
import datetime
import re

import h5py
import keras
import mido
import numpy as np

### Load the data

In [216]:
# Load the data
# midi_file = mido.MidiFile("../data/midi/ArtPepper_Anthropology_FINAL.mid") # Unquantized
midi_file = mido.MidiFile("../data/midi_quantized/ArtPepper_Anthropology_FINAL.mid") # Quantized
midi_track = midi_file.tracks[0]

[<message note_on channel=0 note=65 velocity=104 time=0>,
 <message note_off channel=0 note=65 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=109 time=0>,
 <message note_off channel=0 note=63 velocity=109 time=144>,
 <message note_on channel=0 note=58 velocity=103 time=0>,
 <message note_off channel=0 note=58 velocity=103 time=48>,
 <message note_on channel=0 note=61 velocity=104 time=0>,
 <message note_off channel=0 note=61 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=114 time=0>,
 <message note_off channel=0 note=63 velocity=114 time=192>]

### Clean the data

In [226]:
# Get notes only
midi_notes = [msg for msg in midi_track if msg.type=="note_on" or msg.type=="note_off"]
len(midi_notes)
midi_notes[:10]

[<message note_on channel=0 note=65 velocity=104 time=0>,
 <message note_off channel=0 note=65 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=109 time=0>,
 <message note_off channel=0 note=63 velocity=109 time=144>,
 <message note_on channel=0 note=58 velocity=103 time=0>,
 <message note_off channel=0 note=58 velocity=103 time=48>,
 <message note_on channel=0 note=61 velocity=104 time=0>,
 <message note_off channel=0 note=61 velocity=104 time=192>,
 <message note_on channel=0 note=63 velocity=114 time=0>,
 <message note_off channel=0 note=63 velocity=114 time=192>]

In [232]:
# len([msg for msg in midi_track if msg.type=="note_on" and msg.time>0])

In [238]:
# Create note on/off pairs
midi_note_pairs = [(midi_notes[i], midi_notes[i+1]) for i,_ in enumerate(midi_notes[:-1])
                    if midi_notes[i].type=="note_on" and midi_notes[i+1].type=="note_off"
                    and midi_notes[i].note == midi_notes[i+1].note]
len(midi_note_pairs)

530

In [239]:
# Normalize note velocities
# TODO: Play with normalizing other parameters
for note_on, note_off in midi_note_pairs:
    note_on.velocity = note_on.velocity - (note_on.velocity % 10)
set([note_on.velocity for note_on, note_off in midi_note_pairs])

{70, 80, 90, 100, 110, 120}

In [240]:
midi_note_pairs[:10]

[(<message note_on channel=0 note=65 velocity=100 time=0>,
  <message note_off channel=0 note=65 velocity=104 time=192>),
 (<message note_on channel=0 note=63 velocity=100 time=0>,
  <message note_off channel=0 note=63 velocity=109 time=144>),
 (<message note_on channel=0 note=58 velocity=100 time=0>,
  <message note_off channel=0 note=58 velocity=103 time=48>),
 (<message note_on channel=0 note=61 velocity=100 time=0>,
  <message note_off channel=0 note=61 velocity=104 time=192>),
 (<message note_on channel=0 note=63 velocity=110 time=0>,
  <message note_off channel=0 note=63 velocity=114 time=192>),
 (<message note_on channel=0 note=58 velocity=100 time=0>,
  <message note_off channel=0 note=58 velocity=106 time=192>),
 (<message note_on channel=0 note=58 velocity=90 time=0>,
  <message note_off channel=0 note=58 velocity=98 time=384>),
 (<message note_on channel=0 note=50 velocity=90 time=1344>,
  <message note_off channel=0 note=50 velocity=90 time=192>),
 (<message note_on channel

In [241]:
# Create note set
# note_events_keys = ("type", "pitch", "velocity", "duration")
# note_events = [(note.type, note.note, note.velocity, note.time) for note in midi_notes]

note_events_keys = ("noteon_pitch", "noteon_velocity", "noteon_time",
                    "noteoff_velocity", "noteoff_time")
note_events = [(note_on.note, note_on.velocity, note_on.time,
                note_off.velocity, note_off.time) for note_on, note_off in midi_note_pairs]

note_set = sorted(list(set(note_events)))
num_note_events = len(note_events)
num_unique_notes = len(note_set)
print("{} unique notes in note set (vs. {} note events in MIDI file)".format(num_unique_notes, num_note_events))
note_set[:10]

462 unique notes in note set (vs. 530 note events in MIDI file)


[(50, 90, 1344, 90, 192),
 (50, 100, 576, 102, 192),
 (51, 100, 0, 106, 112),
 (51, 100, 0, 107, 384),
 (51, 100, 0, 108, 96),
 (52, 110, 0, 110, 192),
 (53, 90, 0, 98, 96),
 (53, 100, 0, 101, 64),
 (53, 100, 0, 106, 96),
 (53, 100, 0, 109, 384)]

In [242]:
# len([note for note in note_set if note[0] == "note_off"])

In [243]:
# Make map for note to integer
note_to_int = dict((n, i) for i, n in enumerate(note_set))
note_to_int

{(50, 90, 1344, 90, 192): 0,
 (50, 100, 576, 102, 192): 1,
 (51, 100, 0, 106, 112): 2,
 (51, 100, 0, 107, 384): 3,
 (51, 100, 0, 108, 96): 4,
 (52, 110, 0, 110, 192): 5,
 (53, 90, 0, 98, 96): 6,
 (53, 100, 0, 101, 64): 7,
 (53, 100, 0, 106, 96): 8,
 (53, 100, 0, 109, 384): 9,
 (53, 110, 576, 111, 48): 10,
 (54, 100, 0, 102, 160): 11,
 (54, 100, 0, 107, 96): 12,
 (55, 90, 0, 97, 64): 13,
 (55, 100, 0, 102, 64): 14,
 (55, 100, 0, 102, 256): 15,
 (55, 100, 0, 103, 144): 16,
 (55, 100, 0, 106, 64): 17,
 (55, 100, 0, 107, 96): 18,
 (55, 110, 0, 111, 48): 19,
 (55, 110, 528, 114, 96): 20,
 (56, 100, 0, 103, 48): 21,
 (56, 100, 0, 107, 96): 22,
 (56, 100, 0, 109, 96): 23,
 (56, 110, 0, 114, 96): 24,
 (57, 100, 0, 103, 96): 25,
 (57, 100, 0, 107, 64): 26,
 (57, 100, 0, 107, 96): 27,
 (57, 100, 0, 108, 96): 28,
 (57, 100, 0, 109, 96): 29,
 (57, 100, 0, 109, 160): 30,
 (57, 110, 0, 112, 192): 31,
 (57, 110, 0, 113, 96): 32,
 (57, 110, 0, 115, 96): 33,
 (58, 90, 0, 96, 96): 34,
 (58, 90, 0, 98, 3

In [244]:
# Make map for integer back to note (we'll need this in the generation phase)
int_to_note = dict((i, n) for i, n in enumerate(note_set))
int_to_note

{0: (50, 90, 1344, 90, 192),
 1: (50, 100, 576, 102, 192),
 2: (51, 100, 0, 106, 112),
 3: (51, 100, 0, 107, 384),
 4: (51, 100, 0, 108, 96),
 5: (52, 110, 0, 110, 192),
 6: (53, 90, 0, 98, 96),
 7: (53, 100, 0, 101, 64),
 8: (53, 100, 0, 106, 96),
 9: (53, 100, 0, 109, 384),
 10: (53, 110, 576, 111, 48),
 11: (54, 100, 0, 102, 160),
 12: (54, 100, 0, 107, 96),
 13: (55, 90, 0, 97, 64),
 14: (55, 100, 0, 102, 64),
 15: (55, 100, 0, 102, 256),
 16: (55, 100, 0, 103, 144),
 17: (55, 100, 0, 106, 64),
 18: (55, 100, 0, 107, 96),
 19: (55, 110, 0, 111, 48),
 20: (55, 110, 528, 114, 96),
 21: (56, 100, 0, 103, 48),
 22: (56, 100, 0, 107, 96),
 23: (56, 100, 0, 109, 96),
 24: (56, 110, 0, 114, 96),
 25: (57, 100, 0, 103, 96),
 26: (57, 100, 0, 107, 64),
 27: (57, 100, 0, 107, 96),
 28: (57, 100, 0, 108, 96),
 29: (57, 100, 0, 109, 96),
 30: (57, 100, 0, 109, 160),
 31: (57, 110, 0, 112, 192),
 32: (57, 110, 0, 113, 96),
 33: (57, 110, 0, 115, 96),
 34: (58, 90, 0, 96, 96),
 35: (58, 90, 0, 9

In [245]:
# Split into subsequences
# TODO: Play with sequence lengths (for both input and outputs)
seq_length = 10
data_input = [] # "X"
data_output = [] # "y"
for i in range(num_note_events-seq_length):
    seq_input = note_events[i:i+seq_length]
    seq_output = note_events[i+seq_length]
    data_input.append([note_to_int[note] for note in seq_input])
    data_output.append(note_to_int[seq_output])
num_seqs = len(data_input)
print("{} sequences".format(num_seqs))
print("{} ==> {}".format(data_input[0], data_output[0]))
data_input[:5]

520 sequences
[173, 134, 40, 90, 146, 45, 35, 0, 32, 79] ==> 53


[[173, 134, 40, 90, 146, 45, 35, 0, 32, 79],
 [134, 40, 90, 146, 45, 35, 0, 32, 79, 53],
 [40, 90, 146, 45, 35, 0, 32, 79, 53, 16],
 [90, 146, 45, 35, 0, 32, 79, 53, 16, 44],
 [146, 45, 35, 0, 32, 79, 53, 16, 44, 94]]

In [246]:
# Reshape input sequences into form [samples, time steps, features]
X = np.reshape(data_input, (num_seqs, seq_length, 1))

# Normalize to 0-1 range
X = X / float(num_unique_notes)

# Convert output to one-hot encoding
y = keras.utils.np_utils.to_categorical(data_output)

In [247]:
print(X[0])
print("==>")
print(y[0])

[[ 0.37445887]
 [ 0.29004329]
 [ 0.08658009]
 [ 0.19480519]
 [ 0.31601732]
 [ 0.0974026 ]
 [ 0.07575758]
 [ 0.        ]
 [ 0.06926407]
 [ 0.17099567]]
==>
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  

### Define the LSTM model

In [248]:
# Remembering what our shape is
"X.shape = {}, y.shape = {}".format(X.shape, y.shape)

'X.shape = (520, 10, 1), y.shape = (520, 462)'

In [249]:
# Define the model
model = keras.models.Sequential()
model.add(keras.layers.LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(256))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [250]:
# Setup checkpoints
checkpoint_path = "weights_{epoch:02d}_{loss:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
callbacks = [checkpoint]

In [251]:
# Fit the model (i.e. train the network)!
# TODO: Play with these parameters, of course
num_epochs = 100
batch_size = 32
model.fit(X, y, epochs=num_epochs, batch_size=batch_size, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x126eb5d30>

### Generate output notes

In [252]:
# Load network weights and recompile
weights_filename = "weights_99_0.9724.hdf5" # Using only note ons
weights_filename = "weights_99_1.3571.hdf5" # Using both note ons and note offs
weights_filename = "weights_95_1.4241.hdf5" # Using note on/off pairs
model.load_weights(weights_filename)
model.compile(loss="categorical_crossentropy", optimizer="adam")
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 10, 256)           264192    
_________________________________________________________________
dropout_7 (Dropout)          (None, 10, 256)           0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dropout_8 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 462)               118734    
Total params: 908,238
Trainable params: 908,238
Non-trainable params: 0
_________________________________________________________________


In [253]:
# Start with a random seed
seq_in = data_input[np.random.randint(num_seqs)]
[int_to_note[i] for i in seq_in]

[(64, 100, 0, 106, 96),
 (62, 100, 0, 107, 48),
 (63, 100, 0, 104, 48),
 (67, 110, 0, 110, 144),
 (66, 100, 0, 101, 96),
 (64, 100, 0, 100, 48),
 (65, 100, 0, 109, 96),
 (61, 100, 0, 105, 192),
 (65, 100, 0, 105, 96),
 (70, 110, 0, 114, 288)]

In [265]:
seq_in_notes = [int_to_note[i] for i in seq_in]
[dict((note_events_keys[i], note[i]) for i,_ in enumerate(note)) for note in seq_in_notes][0]

{'noteoff_time': 96,
 'noteoff_velocity': 106,
 'noteon_pitch': 64,
 'noteon_time': 0,
 'noteon_velocity': 100}

In [266]:
# Generate the notes!
num_notes_to_generate = 100
notes_out = []

for i in range(num_notes_to_generate):
    # Reshape and normalize
    x = np.reshape(seq_in, (1, len(seq_in), 1)) # Reshape
    x = x / float(num_unique_notes) # Normalize
    
    # Make the prediction
    pred = model.predict(x, batch_size=batch_size, verbose=0)
    
    # Get output note
    note_idx = np.argmax(pred)
    note = int_to_note[note_idx]
    
    # Add output note to list
    notes_out.append(note)
    
    # Add output note to input sequence, and move forward by one note
    seq_in.append(note_idx) 
    seq_in = seq_in[1:len(seq_in)]

notes_out[:20]

[(67, 100, 0, 104, 48),
 (63, 100, 0, 104, 48),
 (60, 100, 0, 105, 48),
 (68, 100, 0, 108, 96),
 (68, 100, 0, 103, 96),
 (72, 110, 0, 112, 64),
 (65, 110, 672, 114, 144),
 (70, 110, 0, 111, 960),
 (61, 100, 0, 105, 96),
 (63, 100, 0, 103, 128),
 (61, 90, 0, 94, 64),
 (67, 110, 0, 112, 96),
 (55, 100, 0, 102, 64),
 (58, 100, 0, 107, 96),
 (58, 100, 0, 107, 96),
 (65, 100, 0, 106, 96),
 (74, 110, 0, 116, 144),
 (62, 110, 0, 117, 48),
 (53, 100, 0, 101, 64),
 (77, 110, 0, 118, 128)]

In [271]:
# Convert the sequence of note tuples into a sequence of MIDI notes, and then write to MIDI file

# Create MIDI file and track
midi_file_out = mido.MidiFile()
midi_track_out = mido.MidiTrack()
midi_file_out.tracks.append(midi_track_out)

# Append "headers" (track name, tempo, key, time signature)
for message in midi_track[:4]:
    midi_track_out.append(message)

# Add notes
prev_time = 0
prev_note = 0
for note in notes_out:
    ## Note ons only
    #curr_time = prev_time + note[2]
    #prev_note = note[0]
    #prev_time = curr_time
    #message_noteoff = mido.Message("note_off", note=prev_note, velocity=0, time=curr_time) # Prev note off
    #message_noteon = mido.Message("note_on", note=note[0], velocity=note[1], time=curr_time) # Curr note on
    #midi_track_out.append(message_noteoff)
    #midi_track_out.append(message_noteon)
    
    ## Note ons and note offs 
    #curr_time = prev_time + note[3] if note[0]=="note_on" else prev_time
    #curr_time = prev_time + note[3]
    #prev_time = curr_time
    #message = mido.Message(note[0], note=note[1], velocity=note[2], time=curr_time)
    #midi_track_out.append(message)
    
    # Note on/off pairs
    note = dict((note_events_keys[i], note[i]) for i,_ in enumerate(note))
    curr_time_noteon = prev_time + note["noteon_time"]
    curr_time_noteoff = prev_time + note["noteoff_time"]
    #prev_time = curr_time_noteoff
    message_noteon = mido.Message("note_on", note=note["noteon_pitch"], velocity=note["noteon_velocity"], time=curr_time_noteon)
    message_noteoff = mido.Message("note_off", note=note["noteon_pitch"], velocity=note["noteoff_velocity"], time=curr_time_noteoff)
    midi_track_out.append(message_noteon)
    midi_track_out.append(message_noteoff)
    
# Save file to disk
filename_out = str(datetime.datetime.now())
filename_out = re.sub("\W+", "", filename_out)
filename_out = "../data/out_{}.mid".format(filename_out)
midi_file_out.save(filename_out)

for message in midi_track_out[4:20]:
    print(message)

note_on channel=0 note=67 velocity=100 time=0
note_off channel=0 note=67 velocity=104 time=48
note_on channel=0 note=63 velocity=100 time=0
note_off channel=0 note=63 velocity=104 time=48
note_on channel=0 note=60 velocity=100 time=0
note_off channel=0 note=60 velocity=105 time=48
note_on channel=0 note=68 velocity=100 time=0
note_off channel=0 note=68 velocity=108 time=96
note_on channel=0 note=68 velocity=100 time=0
note_off channel=0 note=68 velocity=103 time=96
note_on channel=0 note=72 velocity=110 time=0
note_off channel=0 note=72 velocity=112 time=64
note_on channel=0 note=65 velocity=110 time=672
note_off channel=0 note=65 velocity=114 time=144
note_on channel=0 note=70 velocity=110 time=0
note_off channel=0 note=70 velocity=111 time=960
