# Jazz melody generation using LSTM RNNs

Using data from the Weimar Jazz Database and based on Jason Brownlee's LSTM text generation tutorial.

Currently this only takes in a single MIDI file containing the melody track; further notebooks will explore multiple MIDI files, harmony mappings, and who knows what else!

### Imports

In [165]:
import datetime
import re

import h5py
import keras
import mido
import numpy as np

### Load the data

In [62]:
# midi_file = mido.MidiFile("../data/midi/ArtPepper_Anthropology_FINAL.mid") # Unquantized
midi_file = mido.MidiFile("../data/midi_quantized/ArtPepper_Anthropology_FINAL.mid") # Quantized
midi_track = midi_file.tracks[0]
midi_notes = [msg for msg in midi_track if msg.type == "note_on"]
len(midi_notes)
midi_notes[:10]

[<message note_on channel=0 note=65 velocity=104 time=0>,
 <message note_on channel=0 note=63 velocity=109 time=0>,
 <message note_on channel=0 note=58 velocity=103 time=0>,
 <message note_on channel=0 note=61 velocity=104 time=0>,
 <message note_on channel=0 note=63 velocity=114 time=0>,
 <message note_on channel=0 note=58 velocity=106 time=0>,
 <message note_on channel=0 note=58 velocity=98 time=0>,
 <message note_on channel=0 note=50 velocity=90 time=1344>,
 <message note_on channel=0 note=57 velocity=113 time=0>,
 <message note_on channel=0 note=60 velocity=110 time=0>]

### Format the data

In [65]:
# Normalize note velocities
# TODO: Play with normalizing other parameters
for note in midi_notes:
    note.velocity = note.velocity - (note.velocity % 10)
set([note.velocity for note in midi_notes])

{70, 80, 90, 100, 110, 120}

In [74]:
# Create note set
note_events_keys = ("pitch", "velocity", "duration")
note_events = [(note.note, note.velocity, note.time) for note in midi_notes]
note_set = sorted(list(set(note_events)))
num_note_events = len(note_events)
num_unique_notes = len(note_set)
print("{} unique notes in note set (vs. {} note events in MIDI file)".format(num_unique_notes, num_note_events))
note_set[:10]

136 unique notes in note set (vs. 530 note events in MIDI file)


[(50, 90, 1344),
 (50, 100, 576),
 (51, 100, 0),
 (52, 110, 0),
 (53, 90, 0),
 (53, 100, 0),
 (53, 110, 576),
 (54, 100, 0),
 (55, 90, 0),
 (55, 100, 0)]

In [75]:
# Make map for note to integer
note_to_int = dict((n, i) for i, n in enumerate(note_set))
note_to_int

{(50, 90, 1344): 0,
 (50, 100, 576): 1,
 (51, 100, 0): 2,
 (52, 110, 0): 3,
 (53, 90, 0): 4,
 (53, 100, 0): 5,
 (53, 110, 576): 6,
 (54, 100, 0): 7,
 (55, 90, 0): 8,
 (55, 100, 0): 9,
 (55, 110, 0): 10,
 (55, 110, 528): 11,
 (56, 100, 0): 12,
 (56, 110, 0): 13,
 (57, 100, 0): 14,
 (57, 110, 0): 15,
 (58, 90, 0): 16,
 (58, 100, 0): 17,
 (58, 110, 0): 18,
 (59, 80, 0): 19,
 (59, 90, 384): 20,
 (59, 100, 0): 21,
 (60, 90, 0): 22,
 (60, 100, 0): 23,
 (60, 100, 576): 24,
 (60, 110, 0): 25,
 (61, 90, 0): 26,
 (61, 100, 0): 27,
 (61, 100, 624): 28,
 (61, 110, 0): 29,
 (62, 90, 0): 30,
 (62, 100, 0): 31,
 (62, 110, 0): 32,
 (62, 110, 912): 33,
 (63, 90, 0): 34,
 (63, 100, 0): 35,
 (63, 100, 576): 36,
 (63, 100, 624): 37,
 (63, 100, 768): 38,
 (63, 110, 0): 39,
 (64, 90, 0): 40,
 (64, 100, 0): 41,
 (64, 100, 864): 42,
 (64, 100, 960): 43,
 (64, 110, 0): 44,
 (65, 90, 0): 45,
 (65, 100, 0): 46,
 (65, 100, 1056): 47,
 (65, 110, 0): 48,
 (65, 110, 336): 49,
 (65, 110, 672): 50,
 (66, 90, 0): 51,
 

In [103]:
# Make map for integer back to note (we'll need this in the generation phase)
int_to_note = dict((i, n) for i, n in enumerate(note_set))
int_to_note

{0: (50, 90, 1344),
 1: (50, 100, 576),
 2: (51, 100, 0),
 3: (52, 110, 0),
 4: (53, 90, 0),
 5: (53, 100, 0),
 6: (53, 110, 576),
 7: (54, 100, 0),
 8: (55, 90, 0),
 9: (55, 100, 0),
 10: (55, 110, 0),
 11: (55, 110, 528),
 12: (56, 100, 0),
 13: (56, 110, 0),
 14: (57, 100, 0),
 15: (57, 110, 0),
 16: (58, 90, 0),
 17: (58, 100, 0),
 18: (58, 110, 0),
 19: (59, 80, 0),
 20: (59, 90, 384),
 21: (59, 100, 0),
 22: (60, 90, 0),
 23: (60, 100, 0),
 24: (60, 100, 576),
 25: (60, 110, 0),
 26: (61, 90, 0),
 27: (61, 100, 0),
 28: (61, 100, 624),
 29: (61, 110, 0),
 30: (62, 90, 0),
 31: (62, 100, 0),
 32: (62, 110, 0),
 33: (62, 110, 912),
 34: (63, 90, 0),
 35: (63, 100, 0),
 36: (63, 100, 576),
 37: (63, 100, 624),
 38: (63, 100, 768),
 39: (63, 110, 0),
 40: (64, 90, 0),
 41: (64, 100, 0),
 42: (64, 100, 864),
 43: (64, 100, 960),
 44: (64, 110, 0),
 45: (65, 90, 0),
 46: (65, 100, 0),
 47: (65, 100, 1056),
 48: (65, 110, 0),
 49: (65, 110, 336),
 50: (65, 110, 672),
 51: (66, 90, 0),
 

In [78]:
# Split into subsequences
# TODO: Play with sequence lengths (for both input and outputs)
seq_length = 10
data_input = [] # "X"
data_output = [] # "y"
for i in range(num_note_events-seq_length):
    seq_input = note_events[i:i+seq_length]
    seq_output = note_events[i+seq_length]
    data_input.append([note_to_int[note] for note in seq_input])
    data_output.append(note_to_int[seq_output])
num_seqs = len(data_input)
print("{} sequences".format(num_seqs))
print("{} ==> {}".format(data_input[0], data_output[0]))
data_input[:5]

520 sequences
[46, 35, 17, 27, 39, 17, 16, 0, 15, 25] ==> 18


[[46, 35, 17, 27, 39, 17, 16, 0, 15, 25],
 [35, 17, 27, 39, 17, 16, 0, 15, 25, 18],
 [17, 27, 39, 17, 16, 0, 15, 25, 18, 9],
 [27, 39, 17, 16, 0, 15, 25, 18, 9, 17],
 [39, 17, 16, 0, 15, 25, 18, 9, 17, 27]]

In [84]:
# Reshape input sequences into form [samples, time steps, features]
X = np.reshape(data_input, (num_seqs, seq_length, 1))

# Normalize to 0-1 range
X = X / float(num_unique_notes)

# Convert output to one-hot encoding
y = keras.utils.np_utils.to_categorical(data_output)

In [91]:
print(X[0])
print("==>")
print(y[0])

[[ 0.33823529]
 [ 0.25735294]
 [ 0.125     ]
 [ 0.19852941]
 [ 0.28676471]
 [ 0.125     ]
 [ 0.11764706]
 [ 0.        ]
 [ 0.11029412]
 [ 0.18382353]]
==>
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


### Define the LSTM model

In [93]:
# Remembering what our shape is
"X.shape = {}, y.shape = {}".format(X.shape, y.shape)

'X.shape = (520, 10, 1), y.shape = (520, 136)'

In [94]:
keras.layers.LSTM?

In [98]:
# Define the model
model = keras.models.Sequential()
model.add(keras.layers.LSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(256))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(y.shape[1], activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam")

In [96]:
# Setup checkpoints
checkpoint_path = "weights_{epoch:02d}_{loss:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor="loss", verbose=1, save_best_only=True, mode="min")
callbacks = [checkpoint]

In [100]:
model.fit?

In [135]:
# Fit the model (i.e. train the network)!
# TODO: Play with these parameters, of course
num_epochs = 100
batch_size = 32
model.fit(X, y, epochs=num_epochs, batch_size=batch_size, callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x11fb8ccc0>

### Generate output notes

In [136]:
# Load network weights and recompile
weights_filename = "weights_99_0.9724.hdf5"
model.load_weights(weights_filename)
model.compile(loss="categorical_crossentropy", optimizer="adam")
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 10, 256)           264192    
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 256)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 136)               34952     
Total params: 824,456
Trainable params: 824,456
Non-trainable params: 0
_________________________________________________________________


In [137]:
# Start with a random seed
seq_in = data_input[np.random.randint(num_seqs)]
[int_to_note[i] for i in seq_in]

[(72, 110, 0),
 (70, 110, 0),
 (66, 90, 0),
 (66, 100, 576),
 (70, 110, 0),
 (72, 110, 0),
 (73, 110, 0),
 (74, 110, 0),
 (75, 120, 0),
 (73, 110, 0)]

In [141]:
# Generate the notes!
num_notes_to_generate = 100
notes_out = []

for i in range(num_notes_to_generate):
    # Reshape and normalize
    x = np.reshape(seq_in, (1, len(seq_in), 1)) # Reshape
    x = x / float(num_unique_notes) # Normalize
    
    # Make the prediction
    pred = model.predict(x, batch_size=batch_size, verbose=0)
    
    # Get output note
    note_idx = np.argmax(pred)
    note = int_to_note[note_idx]
    
    # Add output note to list
    notes_out.append(note)
    
    # Add output note to input sequence, and move forward by one note
    seq_in.append(note_idx) 
    seq_in = seq_in[1:len(seq_in)]

notes_out[:10]

[(70, 100, 0),
 (74, 110, 0),
 (79, 120, 0),
 (75, 120, 0),
 (78, 120, 288),
 (79, 110, 0),
 (81, 110, 0),
 (81, 110, 0),
 (81, 110, 0),
 (75, 100, 0)]

In [172]:
# Convert the sequence of note tuples into a sequence of MIDI notes

# Create MIDI file and track
midi_file_out = mido.MidiFile()
midi_track_out = mido.MidiTrack()
midi_file_out.tracks.append(midi_track_out)

# Append "headers" (track name, tempo, key, time signature)
for message in midi_track[:4]:
    midi_track_out.append(message)

# Add notes
for note in notes_out:
    message = mido.Message("note_on", note=note[0], velocity=note[1], time=note[2])
    midi_track_out.append(message)
    
# Save file to disk
filename_out = str(datetime.datetime.now())
filename_out = re.sub("\W+", "", filename_out)
filename_out = "../data/out_{}.mid".format(filename_out)
midi_file_out.save(filename_out)