Download the Bach chorales dataset and unzip it. It is composed of 382 chorales composed by Johann Sebastian Bach. Each chorale is 100 to 640 time steps long, and each time step contains 4 integers, where each integer corresponds to a note’s index on a piano (except for the value 0, which means that no note is played). Train a model—recurrent, convolutional, or both—that can predict the next time step (four notes), given a sequence of time steps from a chorale. Then use this model to generate Bach-like music, one note at a time: you can do this by giving the model the start of a chorale and asking it to predict the next time step, then appending these time steps to the input sequence and asking the model for the next note, and so on. Also make sure to check out Google’s Coconet model, which was used for a nice Google doodle about Bach.

Approach:
- Given 16 steps, predict the next 4 steps

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
import tensorrt
import tensorflow as tf
import numpy as np
from pathlib import Path

In [35]:
from IPython.display import Audio

def notes_to_frequencies(notes):
    # Frequency doubles when you go up one octave; there are 12 semi-tones
    # per octave; Note A on octave 4 is 440 Hz, and it is note number 69.
    return 2 ** ((np.array(notes) - 69) / 12) * 440

def frequencies_to_samples(frequencies, tempo, sample_rate):
    note_duration = 60 / tempo # the tempo is measured in beats per minutes
    # To reduce click sound at every beat, we round the frequencies to try to
    # get the samples close to zero at the end of each note.
    frequencies = (note_duration * frequencies).round() / note_duration
    n_samples = int(note_duration * sample_rate)
    time = np.linspace(0, note_duration, n_samples)
    sine_waves = np.sin(2 * np.pi * frequencies.reshape(-1, 1) * time)
    # Removing all notes with frequencies ≤ 9 Hz (includes note 0 = silence)
    sine_waves *= (frequencies > 9.).reshape(-1, 1)
    return sine_waves.reshape(-1)

def chords_to_samples(chords, tempo, sample_rate):
    freqs = notes_to_frequencies(chords)
    freqs = np.r_[freqs, freqs[-1:]] # make last note a bit longer
    merged = np.mean([frequencies_to_samples(melody, tempo, sample_rate)
                     for melody in freqs.T], axis=0)
    n_fade_out_samples = sample_rate * 60 // tempo # fade out last note
    fade_out = np.linspace(1., 0., n_fade_out_samples)**2
    merged[-n_fade_out_samples:] *= fade_out
    return merged

def play_chords(chords, tempo=160, amplitude=0.1, sample_rate=44100, filepath=None):
    samples = amplitude * chords_to_samples(chords, tempo, sample_rate)
    if filepath:
        from scipy.io import wavfile
        samples = (2**15 * samples).astype(np.int16)
        wavfile.write(filepath, sample_rate, samples)
        return display(Audio(filepath))
    else:
        return display(Audio(samples, rate=sample_rate))

In [3]:
def to_windows(dataset: tf.data.Dataset, length):
    dataset = dataset.window(length, shift=1, drop_remainder=True)
    return dataset.flat_map(lambda window_ds: window_ds.batch(length))

def to_seq2seq_dataset(tensor, seq_length=16, ahead=4):
    ds = to_windows(tf.data.Dataset.from_tensor_slices(tensor), ahead + 1)
    ds = to_windows(ds, seq_length).map(lambda S: (S[:, 0], S[:, 1:]))
    return ds

In [38]:
def process_file(file_name):
    with open(file_name) as f:
        f.readline()
        notes = []
        for line in f.readlines():
            notes.append([int(x) for x in line.split(',')])
        return tf.constant(notes, dtype=tf.int64)

def load_files(folder, shuffle=False, batch_size=32):
    ds = None
    for x in Path(folder).glob('*.csv'):
        tensor = to_seq2seq_dataset(process_file(x))
        ds = ds.concatenate(tensor) if ds else tensor
    if shuffle:
        ds = ds.shuffle(8 * batch_size)
    return ds.batch(batch_size, drop_remainder=True).prefetch(1)

root_dir = Path('data/jsb_chorales')
training_ds = load_files(root_dir / 'train', shuffle=True)
valid_ds = load_files(root_dir / 'valid')
test_ds = load_files(root_dir / 'test')

In [53]:
last_chord = None
for x in process_file(root_dir / 'train/chorale_000.csv').numpy()[:16]:
    ser_x = str(x)
    if ser_x != last_chord:
        print(ser_x)
        play_chords(x)
        last_chord = ser_x

[74 70 65 58]


[75 70 58 55]


[75 70 60 55]


[77 69 62 50]


[77 70 62 55]


[77 69 62 55]


In [5]:
for x, y in training_ds.take(1):
    print(x.shape, y.shape)

for x, y in valid_ds.take(1):
    print(x.shape, y.shape)

for x, y in test_ds.take(1):
    print(x.shape, y.shape)

(32, 16, 4) (32, 16, 4, 4)
(32, 16, 4) (32, 16, 4, 4)
(32, 16, 4) (32, 16, 4, 4)


In [6]:
if False:
    min = tf.Variable(100)
    max = tf.Variable(0)
    for x in training_ds.concatenate(valid_ds).concatenate(test_ds):
        x = tf.reshape(x, tf.reduce_prod(tf.shape(x)))
        non_zero_x = tf.boolean_mask(x, tf.not_equal(x, 0))
        min.assign(tf.minimum(tf.reduce_min(non_zero_x), min))
        max.assign(tf.maximum(tf.reduce_max(non_zero_x), max))
        print('.', end='')

    print(min.numpy(), max.numpy())

In [7]:
def first(x, _):
    return x

lookup = tf.keras.layers.IntegerLookup(output_mode='multi_hot')
lookup.adapt(training_ds.map(first).concatenate(valid_ds.map(first)).concatenate(test_ds.map(first)))

In [14]:
preprocess_inputs = tf.keras.layers.TimeDistributed(lookup)

preprocess_labels = tf.keras.Sequential([
    tf.keras.layers.Reshape([-1, 4]),
    preprocess_inputs,
    # 4 is the "ahead"
    tf.keras.layers.Reshape([-1, 4, lookup.vocabulary_size()])
])

def preprocess(x, y):
    return preprocess_inputs(x), preprocess_labels(y)

def downsample(steps):
    def _downsample(x, y):
        return x, y[:, steps:]
    return _downsample

train = training_ds.map(preprocess) # .map(downsample(6))
valid = training_ds.map(preprocess) # .map(downsample(6))
test = test_ds.map(preprocess) # .map(downsample(6))

In [15]:
for x, y in train.take(1):
    print(x.shape, y.shape)

for x, y in valid.take(1):
    print(x.shape, y.shape)

for x, y in test.take(1):
    print(x.shape, y.shape)

(32, 16, 48) (32, 16, 4, 48)
(32, 16, 48) (32, 16, 4, 48)
(32, 16, 48) (32, 16, 4, 48)


In [18]:
def print_shape(name):
    def _print_shape(x):
        tf.print(f'PS-{name}', x.shape)
        return x
    return _print_shape

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(16, 48)),
    # tf.keras.layers.Lambda(print_shape('A')),
    # tf.keras.layers.Conv1D(64, kernel_size=1),
    # tf.keras.layers.Lambda(print_shape('B')),
    # tf.keras.layers.Conv1D(128, kernel_size=1),
    # tf.keras.layers.Lambda(print_shape('C')),
    tf.keras.layers.GRU(128, return_sequences=True),
    tf.keras.layers.GRU(4 * 128, return_sequences=True),
    # tf.keras.layers.Lambda(print_shape('D')),
    tf.keras.layers.Reshape([-1, 4, 128]),
    # tf.keras.layers.Lambda(print_shape('E')),
    tf.keras.layers.Dense(lookup.vocabulary_size(), activation='sigmoid'),
    # tf.keras.layers.Lambda(print_shape('F')),
])

def top_k_categorical_accuracy(y_true, y_pred):
    # y_true = tf.reshape(y_true, shape=[-1, y_true.shape[-1]])
    # y_pred = tf.reshape(y_pred, shape=[-1, y_pred.shape[-1]])
    # tf.print('A', y_true.shape, y_pred.shape)
    return tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=4)

def binary_crossentropy(y_true, y_pred):
    # y_true = tf.reshape(y_true, shape=[-1, y_true.shape[-1]])
    # y_pred = tf.reshape(y_pred, shape=[-1, y_pred.shape[-1]])
    # tf.print('L', y_true.shape, y_pred.shape)
    return tf.keras.losses.binary_crossentropy(y_true, y_pred)


model_root_dir = Path() / 'data' / '02-chorales-rnn'
checkpoints_dir = model_root_dir / 'checkpoints'
tensorboard_dir = model_root_dir / 'tensorboard'
print(f'tensorboard --logdir=ch15/{tensorboard_dir}')

early_stop_cb = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)
checkpoints_cb = tf.keras.callbacks.ModelCheckpoint(checkpoints_dir, save_best_only=True)
tensorboard_cb = tf.keras.callbacks.TensorBoard(tensorboard_dir)

model.compile(
    optimizer=tf.keras.optimizers.Nadam(),
    loss=binary_crossentropy,
    metrics=[top_k_categorical_accuracy]
)

tensorboard --logdir=ch15/data/02-chorales-rnn/tensorboard


In [19]:
model.fit(train, epochs=5, validation_data=valid, callbacks=[early_stop_cb, checkpoints_cb, tensorboard_cb])

Epoch 1/5
   1587/Unknown - 34s 18ms/step - loss: 0.1991 - top_k_categorical_accuracy: 0.6915



INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


Epoch 2/5



INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


Epoch 3/5



INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


Epoch 4/5



INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


Epoch 5/5



INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets


INFO:tensorflow:Assets written to: data/02-chorales-rnn/checkpoints/assets




<keras.callbacks.History at 0x7ff933529610>

In [33]:
sample = tf.Variable(0, dtype=tf.int64)
correct = tf.Variable(0, dtype=tf.int64)
def evaluate_model(model: tf.keras.Model, test):
    for x, y in test:
        y_true = y[:, -1, 0]
        y_hat = model.predict(x)[:, -1, 0]
        top_k = tf.metrics.top_k_categorical_accuracy(y_true, y_hat, 4)
        correct.assign_add(tf.cast(tf.reduce_sum(top_k), dtype=tf.int64))
        sample.assign_add(tf.cast(tf.shape(top_k)[0], dtype=tf.int64))
evaluate_model(model, test)
correct / sample



<tf.Tensor: shape=(), dtype=float64, numpy=0.8421415441176471>

<tf.Variable 'Variable:0' shape=() dtype=int64, numpy=17408>