# Generating melody *and* harmony using LSTMs

Again, using data from the Weimar Jazz Database.

First, let's import the `jazzaiexperiments` module:

In [1]:
import jazzaiexperiments

Using TensorFlow backend.


Next, we'll pick a tune:

In [2]:
tune_name = "ColemanHawkins_BodyAndSoul_FINAL"

### Approach #1: Add underlying harmony directly to note events

Functions here will eventually be migrated to the `jazzaiexperiments` module

In [3]:
import mido
import pandas as pd

In [5]:
# Testing + building jazzaiexperiments functions
# Based on lstm_train_on_midi_input()

from jazzaiexperiments import *

# Set variables (that will be passed in as arguments)
tune_name = "ColemanHawkins_BodyAndSoul_FINAL"
midi_data_dir = "../data/midi/quantized/"
checkpoints_data_dir = "../data/models/"
seq_length = 10
num_epochs = 1

# Create note events
input_filepath = midi_construct_input_filepath(tune_name, midi_data_dir)
midi_track = midi_load_melody_from_file(input_filepath)
note_pairs = midi_extract_note_pairs(midi_track)
note_pairs = midi_normalize_velocities(note_pairs, interval=10)
note_events = midi_create_note_events(note_pairs)
print("Created note events from {}".format(input_filepath))

# Format note data to feed into network
note_set = midi_create_note_set(note_events)
seqs_input, seqs_output = midi_split_subsequences(note_events,
                                                  seq_length=seq_length)
num_seqs = len(seqs_input)
seq_length = len(seqs_input[0])
num_unique_notes = len(note_set)
x, y = midi_format_for_lstm(seqs_input, seqs_output,
                            num_seqs=num_seqs,
                            seq_length=seq_length,
                            num_unique_notes=num_unique_notes)
print("Formatted note data ({} seqs of length {}, "
      "{} unique notes)".format(num_seqs, seq_length, num_unique_notes))

Created note events from ../data/midi/quantized/ColemanHawkins_BodyAndSoul_FINAL.mid
Formatted note data (625 seqs of length 10, 365 unique notes)


In [6]:
def midi_note_event_to_dict(note):
    note_events_keys = midi_get_note_event_keys()
    return dict((note_events_keys[i], note[i]) for i,_ in enumerate(note))

midi_note_event_to_dict(note_events[0])

{'noteoff_time': 16170,
 'noteon_pitch': 51,
 'noteon_time': 76230,
 'noteon_velocity': 100}

In [7]:
[msg for msg in midi_track if "note" not in msg.type]

[<meta message track_name name='Melody' time=0>,
 <meta message set_tempo tempo=631578 time=0>,
 <meta message key_signature key='Db' time=0>,
 <meta message time_signature numerator=4 denominator=4 clocks_per_click=24 notated_32nd_notes_per_beat=8 time=0>,
 <meta message end_of_track time=0>]

In [8]:
midi_track[5]

<message note_off channel=0 note=51 velocity=100 time=16170>

In [9]:
mf = mido.MidiFile(input_filepath)
mf.ticks_per_beat

27720

In [10]:
# Calculate times of note onsets (in seconds)
def calculate_note_times_seconds(input_filepath):
    midi_file = mido.MidiFile(input_filepath)
    midi_track = midi_load_melody_from_file(input_filepath)
    tempo = midi_track[1].tempo
    # ppq = midi_track[3].clocks_per_click
    # n32 = midi_track[3].notated_32nd_notes_per_beat
    ppq = midi_file.ticks_per_beat
    note_times = [mido.tick2second(msg.time, ppq, tempo) for msg in midi_track if "note" in msg.type]
    note_times_summed = []
    for i, t in enumerate(note_times):
        note_times_summed.append(sum(note_times[:i]) + t)
    return note_times_summed

calculate_note_times_seconds(input_filepath)[:10]

[1.7368395,
 2.10526,
 2.10526,
 2.315786,
 2.315786,
 3.3157845000000004,
 3.3157845000000004,
 3.473679,
 3.473679,
 3.6315735]

Actually, let's get the current chord directly from the database. Here I've exported the **beats** and **melody** from the database using a simple:

    SELECT * FROM beats WHERE melid=96
    
and

    SELECT * FROM melody WHERE melid=96
    
with 96 being the melody ID for Coleman Hawkins - Body and Soul

In [11]:
len(note_events)

635

In [12]:
def db_read_file(filepath):
    data = pd.read_csv(filepath)
    return data

db_read_file("../data/db/ColemanHawkins_BodyAndSoul_FINAL_beats.csv").head()

Unnamed: 0,beatid,melid,onset,bar,beat,signature,chord,form,bass_pitch,chorus_id
0,24777,96,8.440748,0,3,,NC,I1,51,0
1,24778,96,9.035805,0,4,,,,51,0
2,24779,96,9.807823,1,1,4/4,Eb-,A1,34,1
3,24780,96,10.452993,1,2,,,,42,1
4,24781,96,11.128458,1,3,,D+7,,41,1


In [13]:
db_read_file("../data/db/ColemanHawkins_BodyAndSoul_FINAL_melody.csv").head()

Unnamed: 0,eventid,melid,onset,pitch,duration,period,division,bar,beat,tatum,...,f0_mod,loud_max,loud_med,loud_sd,loud_relpos,loud_cent,loud_s2b,f0_range,f0_freq_hz,f0_med_dev
0,41757,96,8.939683,51.0,0.278639,4,4,0,3,4,...,,0.552351,62.516859,2.913798,0.24,0.368501,1.181904,21.915053,7.412672,-11.070815
1,41758,96,9.218322,51.0,0.290249,4,3,0,4,2,...,,0.370392,59.980021,1.694662,0.481481,0.443516,1.180483,38.993741,4.401113,-17.066446
2,41759,96,9.508571,51.0,0.371519,4,3,0,4,3,...,,0.432227,58.90674,3.292181,0.441176,0.450475,1.104257,87.635138,4.641182,-32.029418
3,41760,96,10.582494,51.0,0.191565,4,4,1,2,2,...,,0.720836,64.23841,3.861087,0.058824,0.400048,1.193654,88.286175,5.556956,1.528574
4,41761,96,10.808889,53.0,0.145102,4,4,1,2,3,...,,0.530749,61.655251,2.110308,0.153846,0.489615,1.168982,101.227998,7.177734,12.858254


In [14]:
data_beats.dropna?

Object `data_beats.dropna` not found.


In [15]:
def db_get_harmony_for_melody(beats_filepath, melody_filepath):
    data_beats = db_read_file(beats_filepath)
    data_melody = db_read_file(melody_filepath)
    chords = []
    for i,melevt in data_melody.iterrows():
        # Get beats that came before current melody event (i.e. note)
        beats = data_beats.dropna(subset=["chord"])[data_beats.onset < melevt.onset]
        if len(beats) < 1:
            chords.append("NC")
            continue

        # Get most recent chord
        most_recent_beat = beats.iloc[-1:]
        #print("{}: {}".format(i, len(beats)))
        #print(most_recent_beat.chord)
        chord = most_recent_beat.chord
        chord = chord[chord.keys()[0]] if len(chord.keys()) > 0 else "NC"
        chords.append(chord)
    return chords
        
beats_filepath = "../data/db/ColemanHawkins_BodyAndSoul_FINAL_beats.csv"
melody_filepath = "../data/db/ColemanHawkins_BodyAndSoul_FINAL_melody.csv"
chords = db_get_harmony_for_melody(beats_filepath, melody_filepath)
chords[:10]

  import sys


['NC', 'NC', 'NC', 'Eb-', 'Eb-', 'Eb-', 'D+7', 'D+7', 'D+7', 'D+7']

In [16]:
# This will be a variant of midi_create_note_events()
def midi_create_note_events_harmony(note_pairs, chords):
    note_events = [(note_on.note, note_on.velocity,
                    note_on.time, note_off.time,
                    chords[i])
                   for i, (note_on, note_off) in enumerate(note_pairs)]
    return note_events

note_events = midi_create_note_events_harmony(note_pairs, chords)
note_events[:10]

[(51, 100, 76230, 16170, 'NC'),
 (51, 90, 0, 9240, 'NC'),
 (51, 90, 0, 43890, 'NC'),
 (51, 100, 0, 6930, 'Eb-'),
 (53, 90, 0, 6930, 'Eb-'),
 (54, 110, 0, 6930, 'Eb-'),
 (53, 100, 0, 4620, 'D+7'),
 (54, 100, 0, 4620, 'D+7'),
 (53, 90, 0, 39270, 'D+7'),
 (51, 90, 0, 13860, 'D+7')]

In [17]:
# We already did this in db_get_harmony_for_melody() and midi_create_note_events_harmony()
# def midi_add_harmony_to_notes(note_events):
#     # TODO
#     # So we'll need the note events but also the chordal info. From a CSV export from the database itself??
#     pass

Now let's put it all back into our function:

In [29]:
# def lstm_train_with_harmony():

def lstm_train_on_midi_input(tune_name,
                             midi_data_dir="../data/midi/",
                             checkpoints_data_dir="../data/models/",
                             input_filepath=None,
                             weights_filepath=None,
                             db_beats_filepath=None,
                             db_melody_filepath=None,
                             seq_length=10,
                             num_epochs=100,
                             mode="single_melody"):
    """Build and train an LSTM from an input MIDI file."""
    # Load MIDI file
    if input_filepath is None:
        input_filepath = midi_construct_input_filepath(tune_name,
                                                       midi_data_dir)
    midi_track = midi_load_melody_from_file(input_filepath)
    note_pairs = midi_extract_note_pairs(midi_track)
    note_pairs = midi_normalize_velocities(note_pairs, interval=10)
    
    # Get harmony data
    chords = db_get_harmony_for_melody(db_beats_filepath, db_melody_filepath)
    
    # Create note events
    note_events = []
    if mode == "single_melody":
        note_events = midi_create_note_events(note_pairs)
    elif mode == "single_melody_harmony":
        note_events = midi_create_note_events_harmony(note_pairs, chords)
    print("Created {} note events from {} using mode {}".format(len(note_events), input_filepath, mode))

    # Format note data to feed into network
    note_set = midi_create_note_set(note_events)
    seqs_input, seqs_output = midi_split_subsequences(note_events,
                                                      seq_length=seq_length)
    num_seqs = len(seqs_input)
    seq_length = len(seqs_input[0])
    num_unique_notes = len(note_set)
    x, y = midi_format_for_lstm(seqs_input, seqs_output,
                                num_seqs=num_seqs,
                                seq_length=seq_length,
                                num_unique_notes=num_unique_notes)
    print("Formatted note data ({} seqs of length {}, "
          "{} unique notes)".format(num_seqs, seq_length, num_unique_notes))

    # Create LSTM
    model = lstm_create(x.shape, y.shape, num_units=256, dropout_rate=0.2)
    print("Created model")

    # Train LSTM, or load from weights
    if weights_filepath is None:
        callbacks = lstm_setup_callbacks(tune_name, checkpoints_data_dir)
        model = lstm_fit_model(model, x, y,
                               num_epochs=num_epochs,
                               batch_size=32,
                               callbacks=callbacks)
        print("Trained model over {} epochs".format(num_epochs))
    else:
        model = lstm_load_weights(model, weights_filepath)
        print("Loaded weights from {}".format(weights_filepath))

    return (model, note_events, input_filepath)
    
# # Set variables (that will be passed in as arguments)
# tune_name = "ColemanHawkins_BodyAndSoul_FINAL"
# midi_data_dir = "../data/midi/quantized/"
# checkpoints_data_dir = "../data/models/"
# input_filepath = None
# beats_filepath = "../data/db/ColemanHawkins_BodyAndSoul_FINAL_beats.csv"
# melody_filepath = "../data/db/ColemanHawkins_BodyAndSoul_FINAL_melody.csv"
# weights_filepath = None
# seq_length = 10
# num_epochs = 1
# mode = "single_melody_harmony"
# #    

trained = lstm_train_on_midi_input(tune_name="ColemanHawkins_BodyAndSoul_FINAL",
                                   midi_data_dir="../data/midi/quantized/",
                                   db_beats_filepath="../data/db/ColemanHawkins_BodyAndSoul_FINAL_beats.csv",
                                   db_melody_filepath="../data/db/ColemanHawkins_BodyAndSoul_FINAL_melody.csv",
                                   seq_length=10,
                                   num_epochs=1,
                                   mode="single_melody_harmony")
model, note_events, input_filepath = trained

  import sys


Created 635 note events from ../data/midi/quantized/ColemanHawkins_BodyAndSoul_FINAL.mid using mode single_melody_harmony
Formatted note data (625 seqs of length 10, 581 unique notes)
Created model
Epoch 1/1
Trained model over 1 epochs


In [30]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_17 (LSTM)               (None, 10, 256)           264192    
_________________________________________________________________
dropout_17 (Dropout)         (None, 10, 256)           0         
_________________________________________________________________
lstm_18 (LSTM)               (None, 256)               525312    
_________________________________________________________________
dropout_18 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 581)               149317    
Total params: 938,821
Trainable params: 938,821
Non-trainable params: 0
_________________________________________________________________


In [31]:
note_events[:10]

[(51, 100, 76230, 16170, 'NC'),
 (51, 90, 0, 9240, 'NC'),
 (51, 90, 0, 43890, 'NC'),
 (51, 100, 0, 6930, 'Eb-'),
 (53, 90, 0, 6930, 'Eb-'),
 (54, 110, 0, 6930, 'Eb-'),
 (53, 100, 0, 4620, 'D+7'),
 (54, 100, 0, 4620, 'D+7'),
 (53, 90, 0, 39270, 'D+7'),
 (51, 90, 0, 13860, 'D+7')]

### Training + generation from previous notebook

Let's build and train an LSTM model from a MIDI file:

In [None]:
# model, note_events, input_filepath = jazzaiexperiments.lstm_train_on_midi_input(tune_name,
#                                                                                 "../data/midi/quantized/",
#                                                                                 "../data/models/",
#                                                                                 num_epochs=10)
# model.summary()

This is some example output for 10 epochs (it's not very good, and basically gets stuck in a two-to-three-note loop): https://soundcloud.com/usdivad/jazz-ai-experiments-lstm-single-melody-coleman-hawkins-body-and-soul-10-epochs

We've actually trained this model before, so let's load some existing weights (from 100 epochs of training) into our current model:

In [None]:
# model = jazzaiexperiments.lstm_load_weights(model, "../data/models/weights_ColemanHawkins_BodyAndSoul_FINAL_20170702124647869274_99_0.8517.hdf5")
# model.summary()

Note that we can also do this by passing in a `weights_filepath` argument into the `lstm_train_on_midi_input` function when creating the model.

And let's generate some output!

In [None]:
# notes_out = jazzaiexperiments.lstm_generate_midi_output(model, note_events,
#                                                         num_notes_to_generate=100,
#                                                         random_seed=False,
#                                                         add_seed_to_output=True,
#                                                         tune_name=tune_name,
#                                                         midi_source_filepath=input_filepath,
#                                                         data_dir="../data/output")
# notes_out[:20]

Example output for 100 epochs: https://soundcloud.com/usdivad/jazz-ai-experiments-lstm-single-melody-coleman-hawkins-body-and-soul-100-epochs