In [1]:
# init imports

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import (Dense, LSTM, ConvLSTM2D, Flatten)
import matplotlib.pyplot as plt

In [2]:
def view_midi(np_array):
    np_array = np_array.reshape(np_array.shape[0]*np_array.shape[1], np_array.shape[2])
    midi_array = np.flip(np_array.T, axis=0)
    f = plt.figure()
    f.set_figwidth(20)
    f.set_figheight(10)
    plt.imshow(midi_array, cmap='binary', interpolation='None', aspect="auto")
    plt.show()
    return f

In [3]:
from tensorflow.python.client import device_lib
import sys
import GPUtil

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

# print out system information
print ('system version: ', sys.version)
print('tensorflow version: ', tf.__version__)
def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]
print('available devices: ', get_available_devices()) 

# set gpu as device
gpus = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpus))
tf.config.set_visible_devices(gpus[0], 'GPU')

# allow gpu growth
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# get gpu info
GPUtil.showUtilization()

system version:  3.10.6 (tags/v3.10.6:9c7b4bd, Aug  1 2022, 21:53:49) [MSC v.1932 64 bit (AMD64)]
tensorflow version:  2.10.1
available devices:  ['/device:CPU:0', '/device:GPU:0']
Num GPUs Available:  1
| ID | GPU | MEM |
------------------
|  0 |  3% | 13% |


In [4]:
# model

def my_model(num_hidden):
  model = tf.keras.Sequential()
  model.add(ConvLSTM2D(filters=num_hidden, kernel_size=(5,5), strides=(1,1)))
  model.add(Flatten())
  # model.add(Con(128, activation='relu'))
  #model.add(LSTM(units=num_hidden, activation='tanh', input_shape=(10, 128)))
  model.add(Dense(256, activation='relu'))
  model.add(Dense(64, activation='relu'))
  model.add(Dense(9, activation='sigmoid', name='output'))
  return model


In [5]:
# loss and optimizer

def macro_f1(y, y_hat, thresh=0.5):
    """Compute the macro F1-score on a batch of observations (average F1 across labels)
    
    Args:
        y (int32 Tensor): labels array of shape (BATCH_SIZE, N_LABELS)
        y_hat (float32 Tensor): probability matrix from forward propagation of shape (BATCH_SIZE, N_LABELS)
        thresh: probability value above which we predict positive
        
    Returns:
        macro_f1 (scalar Tensor): value of macro F1 for the batch
    """
    y_pred = tf.cast(tf.greater(y_hat, thresh), tf.float32)
    tp = tf.cast(tf.math.count_nonzero(y_pred * y, axis=0), tf.float32)
    fp = tf.cast(tf.math.count_nonzero(y_pred * (1 - y), axis=0), tf.float32)
    fn = tf.cast(tf.math.count_nonzero((1 - y_pred) * y, axis=0), tf.float32)
    f1 = 2*tp / (2*tp + fn + fp + 1e-16)
    macro_f1 = tf.reduce_mean(f1)
    return macro_f1
  
def macro_soft_f1(y, y_hat):
    """Compute the macro soft F1-score as a cost.
    Average (1 - soft-F1) across all labels.
    Use probability values instead of binary predictions.
    
    Args:
        y (int32 Tensor): targets array of shape (BATCH_SIZE, N_LABELS)
        y_hat (float32 Tensor): probability matrix of shape (BATCH_SIZE, N_LABELS)
        
    Returns:
        cost (scalar Tensor): value of the cost function for the batch
    """
    
    y = tf.cast(y, tf.float32)
    y_hat = tf.cast(y_hat, tf.float32)
    tp = tf.reduce_sum(y_hat * y, axis=0)
    fp = tf.reduce_sum(y_hat * (1 - y), axis=0)
    fn = tf.reduce_sum((1 - y_hat) * y, axis=0)
    soft_f1 = 2*tp / (2*tp + fn + fp + 1e-16)
    cost = 1 - soft_f1 # reduce 1 - soft-f1 in order to increase soft-f1
    macro_cost = tf.reduce_mean(cost) # average on all labels
    return macro_cost

In [6]:
# create new model
HIDDEN_DIM = 128
LR = 0.001
WINDOW_LEN = 10
INPUT_SHAPE = (None, 1, WINDOW_LEN, 128, 1)

model = my_model(HIDDEN_DIM)
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
  loss=macro_soft_f1,
  metrics=[macro_f1],
)

model.build(INPUT_SHAPE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d (ConvLSTM2D)    (None, 6, 124, 128)       1651712   
                                                                 
 flatten (Flatten)           (None, 95232)             0         
                                                                 
 dense (Dense)               (None, 256)               24379648  
                                                                 
 dense_1 (Dense)             (None, 64)                16448     
                                                                 
 output (Dense)              (None, 9)                 585       
                                                                 
Total params: 26,048,393
Trainable params: 26,048,393
Non-trainable params: 0
_________________________________________________________________


In [7]:
# mappings for our own training (9)
KICK = 0
SNARE = 1
HH_CLOSED = 2
HH_OPEN = 3
RIDE = 4
TOM_1 = 5
TOM_2 = 6
TOM_3 = 7
CRASH = 8
NUM_FEATS = 9

In [8]:

DATA_ID = '_5s_20hz'
dataset_feats_path = "data/dataset" + DATA_ID + "/" + "train_feats" + DATA_ID + ".npy"
dataset_labels_path = "data/dataset" + DATA_ID + "/" + "train_labels" + DATA_ID + ".npy"

with tf.device('/CPU:0'):
    # load in data
    train_feats_npy = np.load(dataset_feats_path, allow_pickle=True)
    train_labels_npy = np.load(dataset_labels_path, allow_pickle=True)
    print ('feats.shape: ', train_feats_npy.shape)
    print ('labels.shape: ', train_labels_npy.shape)
    
    # reshape numpy arrays
    train_feats_npy = train_feats_npy.reshape(train_feats_npy.shape[0]*train_feats_npy.shape[1], train_feats_npy.shape[2])
    train_labels_npy = train_labels_npy.reshape(train_labels_npy.shape[0]*train_labels_npy.shape[1], train_labels_npy.shape[2])
    
    new_0 = int(train_feats_npy.shape[0] / WINDOW_LEN)
    rem = train_feats_npy.shape[0] - (new_0 * WINDOW_LEN)
    
    print ('new_0: ', new_0, ' rem: ', rem)
    train_feats_npy = train_feats_npy.reshape(new_0, 1, WINDOW_LEN, 128, 1)
    train_labels_npy = train_labels_npy.reshape(new_0, WINDOW_LEN, 9)
    
    reshaped_train_labels = np.zeros(shape=(new_0, 9))
    for index in range(train_labels_npy.shape[0]):
        for t in range(WINDOW_LEN):
            reshaped_train_labels[index][KICK] += train_labels_npy[index][t][KICK]
            reshaped_train_labels[index][SNARE] += train_labels_npy[index][t][SNARE]
            reshaped_train_labels[index][HH_CLOSED] += train_labels_npy[index][t][HH_CLOSED]
            reshaped_train_labels[index][HH_OPEN] += train_labels_npy[index][t][HH_OPEN]
            reshaped_train_labels[index][RIDE] += train_labels_npy[index][t][RIDE]
            reshaped_train_labels[index][TOM_1] += train_labels_npy[index][t][TOM_1]
            reshaped_train_labels[index][TOM_2] += train_labels_npy[index][t][TOM_2]
            reshaped_train_labels[index][TOM_3] += train_labels_npy[index][t][TOM_3]
            reshaped_train_labels[index][CRASH] += train_labels_npy[index][t][CRASH]
    reshaped_train_labels = reshaped_train_labels / WINDOW_LEN
    train_labels_npy = np.where(reshaped_train_labels > 0.5, 1, 0)
    
    # train_feats_npy = np.reshape(train_feats_npy, (train_feats_npy.shape[0], 1, train_feats_npy.shape[1]))
    # train_labels_npy = np.reshape(train_labels_npy, (train_labels_npy.shape[0], 1, train_labels_npy.shape[1]))
    
    print ('reshaped-dataset.shape: ', train_feats_npy.shape)
    print ('reshaped-dataset.shape: ', train_labels_npy.shape)

feats.shape:  (6522, 100, 128)
labels.shape:  (6522, 100, 9)
new_0:  65220  rem:  0
reshaped-dataset.shape:  (65220, 1, 10, 128, 1)
reshaped-dataset.shape:  (65220, 9)


In [9]:
# train the network!
EPOCHS = 5
BATCH_SIZE = 32

model.fit(train_feats_npy, train_labels_npy, epochs=EPOCHS, batch_size=BATCH_SIZE)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x132e3383940>

In [10]:
# save model

MODEL_ID = 'test_model'
model.save('models/' + MODEL_ID)

INFO:tensorflow:Assets written to: models/test_model\assets
