In [85]:
%load_ext autoreload
%autoreload 2

import os
import datetime as dt

import numpy as np
import matplotlib.pyplot as plt
from dataloaders import load_data, make_loaders, append_SOS

import sys
sys.path.insert(0, '/scratch/users/udemir15/ELEC491/bassline_transcription')
from utilities import *

from bassline_transcriber.transcription import NN_output_to_MIDI_file, replace_sustain
from tensorflow.keras.metrics import SparseCategoricalAccuracy

SEED = 27

np.random.seed(SEED)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
with open('/scratch/users/udemir15/ELEC491/bassline_transcription/data/metadata/train_misc_names.txt', 'r') as infile:
    train_misc_names = infile.read().split('\n')
with open('/scratch/users/udemir15/ELEC491/bassline_transcription/data/metadata/val_misc_names.txt', 'r') as infile:
    val_misc_names = infile.read().split('\n')

In [73]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.keras.models import Sequential

In [143]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

class ResidualBlock(layers.Layer):
    def __init__(self, channel, downsample=True):
        super().__init__()
        strides = 2 if downsample else 1
        self.layers = Sequential([layers.Conv1D(channel, 4, activation=None, padding="same", use_bias=False),
                        layers.BatchNormalization(),
                        layers.Activation("relu"),
                        layers.Conv1D(channel, 4, activation=None, padding="same", strides=strides, use_bias=False),
                        layers.BatchNormalization()])
        
        self.shortcut = Sequential([
            layers.Conv1D(channel, 1, activation=None, padding="same", strides=strides, use_bias = False),
            layers.BatchNormalization()])
            
    def call(self, x):
        x = self.layers(x)
        h = self.shortcut(x)
        x = layers.add([h,x])
        x = layers.Activation("relu")(x)
        return x
    
class ResidualStack(layers.Layer):
    def __init__(self, channel, n_blocks=3, downsample=True):
        super().__init__()
        self.layers = Sequential([ResidualBlock(channel, downsample=False)]*(n_blocks-1) +[ResidualBlock(channel, downsample)])
        
    def call(self, data):
        return self.layers(data)
    
class Encoder(layers.Layer):
    
    def __init__(self, embedding_size, embedding_dim, latent_dim):
        
        super().__init__()
        
        self.embedding = layers.Embedding(embedding_size, embedding_dim)
    
        self.layer = Sequential([ResidualStack(32),
                                 ResidualStack(64),
                                 ResidualStack(128),
                                layers.Flatten(),
                                layers.Dense(16, activation="relu")])
               
        self.mean_layer = layers.Dense(latent_dim, name="z_mean")
        self.var_layer = layers.Dense(latent_dim, name="z_log_var")
        
        self.sampling = Sampling()
        
    def call(self, x):
        
        x = self.embedding(x)
        
        x = self.layer(x)
        
        z_mean = self.mean_layer(x)
        z_log_var = self.var_layer(x)
        
        z = self.sampling([z_mean, z_log_var])
        
        return z_mean, z_log_var, z 

class Decoder(layers.Layer):
    
    def __init__(self, input_, output_dim):
        
        super().__init__()
        self.layers = Sequential([layers.Dense(input_*8, activation="relu"),
                layers.Reshape((8, input_)),
                layers.Conv1DTranspose(128, 4, activation="relu", strides=2, padding="same"),
                ResidualStack(128, downsample=False),
                layers.Conv1DTranspose(64, 4, activation="relu", strides=1, padding="same"),
                ResidualStack(64, downsample=False),
                layers.Conv1DTranspose(32, 4, activation="relu", strides=1, padding="same"),
                ResidualStack(32, downsample=False),
                layers.Conv1DTranspose(output_dim, 4, activation="softmax", padding="same")])
        
    def call(self, x):        
        return self.layers(x)

In [144]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)        
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.accuracy_tracker = keras.metrics.Mean(name="acc")
        
        self.acc_fn = SparseCategoricalAccuracy()

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
            self.accuracy_tracker,
        ]
    
    def call(self, data):
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z)
        return z_mean, z_log_var, z, reconstruction

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z, reconstruction = self(data)
            reconstruction_loss = tf.reduce_mean(
               keras.losses.SparseCategoricalCrossentropy()(tf.cast(data, dtype=tf.float32), reconstruction)
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        acc=self.acc_fn(data, reconstruction)
        self.accuracy_tracker.update_state(acc)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
            "acc": self.accuracy_tracker.result()
        }

In [145]:
M = 8

data_params = {'dataset_path': '/scratch/users/udemir15/ELEC491/bassline_transcription/data/datasets/[28, 51]',
               'dataset_name': 'TechHouse_bassline_representations_4020',
               'scale_type': 'min',
               'M': M}

X, titles = load_data(data_params, False)

X = replace_sustain(X, 25)
X[X<9] += 12
X[X>=21] -= 12
X -= 9

X = X[[title not in val_misc_names+train_misc_names for title in titles]]
K = int(X.max()+1) # Number of classes, assumes consecutive [0,max] inclusive
X = X.reshape(-1, 16, 1)
sequence_length = X.shape[1]

print('Number of classes: {}\nSequence Length: {}'.format(K, sequence_length))
print('Number of data points: {}'.format(X.shape[0]))

Number of classes: 12
Sequence Length: 16
Number of data points: 14480


In [146]:
encoder = Encoder(K, 128, 4096)
decoder = Decoder(64, K)

model = VAE(encoder, decoder)
model.compile(optimizer=keras.optimizers.Adam())
model(X[:1])
model.summary()
model.fit(X, epochs=300, batch_size=16)

Model: "vae_34"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
encoder_35 (Encoder)         multiple                  537360    
_________________________________________________________________
decoder_33 (Decoder)         multiple                  2565612   
Total params: 3,102,982
Trainable params: 3,097,596
Non-trainable params: 5,386
_________________________________________________________________
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/3

Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300


Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300

KeyboardInterrupt: 