In [13]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pdb
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
from tensorflow.keras.backend import argmax, cast
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, LSTM, GRU, Input, Embedding, RepeatVector, Reshape, Conv1D, MaxPooling1D, UpSampling1D, Bidirectional, Lambda, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import to_categorical
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [3]:
minor_df = pd.read_csv("Datasets/bassline_representations_min.csv", header=None)
major_df = pd.read_csv("Datasets/bassline_representations_maj.csv", header=None)

In [4]:
minor_df.shape, major_df.shape

((208, 64), (69, 64))

In [5]:
all_data = pd.concat((minor_df, major_df))
minor_data = minor_df.values
major_data = major_df.values

timesteps = major_df.shape[1]

In [6]:
notes = np.unique(all_data)
vocab = np.arange(len(notes))

n2v_mapping = dict(zip(notes, vocab))
v2n_mapping = dict(zip(vocab, notes))

vocab_size = len(n2v_mapping)

def replace_with_dict(ar, dic):
    # Extract out keys and values
    k = np.array(list(dic.keys()))
    v = np.array(list(dic.values()))

    # Get argsort indices
    sidx = k.argsort()

    # Drop the magic bomb with searchsorted to get the corresponding
    # places for a in keys (using sorter since a is not necessarily sorted).
    # Then trace it back to original order with indexing into sidx
    # Finally index into values for desired output.
    return v[sidx[np.searchsorted(k,ar,sorter=sidx)]]

minor_data = replace_with_dict(minor_data, n2v_mapping).astype(int)
major_data = replace_with_dict(major_data, n2v_mapping).astype(int)

In [7]:
real_bassline_train, real_bassline_test, _, _ = train_test_split(minor_data, minor_data, test_size=0.1, random_state=42)
real_bassline_train, real_bassline_val, _, _ = train_test_split(real_bassline_train, real_bassline_train, test_size=1/9, random_state=42)

In [8]:
num_real_train = real_bassline_train.shape[0]
num_real_val = real_bassline_val.shape[0]
num_real_test = real_bassline_test.shape[0]

# Random Part

In [9]:
random_bassline_train = np.random.randint(np.min(vocab), np.max(vocab) + 1, size=(num_real_train, timesteps))
random_bassline_val = np.random.randint(np.min(vocab), np.max(vocab) + 1, size=(num_real_val, timesteps))
random_bassline_test = np.random.randint(np.min(vocab), np.max(vocab) + 1, size=(num_real_test, timesteps))

In [10]:
num_random_train = random_bassline_train.shape[0]
num_random_val = random_bassline_val.shape[0]
num_random_test = random_bassline_test.shape[0]

# All Dataset

In [11]:
X_train = np.row_stack((random_bassline_train, real_bassline_train))
X_val = np.row_stack((random_bassline_val, real_bassline_val))
X_test = np.row_stack((random_bassline_test, real_bassline_test))

y_train = np.concatenate((np.zeros(num_random_train), np.ones(num_real_train))).astype(np.int8)
y_val = np.concatenate((np.zeros(num_random_val), np.ones(num_real_val))).astype(np.int8)
y_test = np.concatenate((np.zeros(num_random_test), np.ones(num_real_test))).astype(np.int8)

# Generated Data

In [12]:
generated = pd.read_csv('../generator/FirstOut.csv').values

generated = replace_with_dict(generated, n2v_mapping)

## Model 1: hidden_dims x Dense

In [31]:
name = 'NBD_dense'

In [None]:
timesteps = 64  # Length of your sequences
embed_size = 32
hidden_dims = (64, 1)
dropout = 0

inputs = Input(shape=(timesteps,))
embedded = Embedding(vocab_size, embed_size)(inputs)
flattened = Flatten()(embedded)
x = Dense(hidden_dims[0], activation='relu')(flattened)
if len(hidden_dims) > 2:
    for hidden_dim in hidden_dims[1:-1]:
        x = Dense(hidden_dim, activation='relu')(x)
res = Dense(hidden_dims[-1], activation='sigmoid')(x)

discriminator = Model(inputs, res)

In [None]:
discriminator.summary()

In [None]:
lr = 5e-3

mc = ModelCheckpoint(f'Models/{name}.hdf5', monitor='val_loss')

optimizer = Adam(learning_rate=lr)

discriminator.compile(optimizer, loss='binary_crossentropy')


In [None]:
hist = discriminator.fit(X_train, y_train,
                epochs=10,
                batch_size=32,
                shuffle=True,
                validation_data=(X_val, y_val),
                callbacks=[mc])

In [None]:
plt.figure(figsize=(20, 6))

plt.plot(hist.history['loss'], label='training')
plt.plot(hist.history['val_loss'], label='validation')
plt.legend()

plt.savefig(f'figures/{name}', dpi=400)

In [32]:
discriminator = load_model(f'Models/{name}.hdf5')

In [33]:
train_preds = discriminator.predict(X_train, batch_size=32) >= 0.5
accuracy_score(y_train, train_preds)

1.0

In [34]:
val_preds = discriminator.predict(X_val, batch_size=32) >= 0.5
accuracy_score(y_val, val_preds)

1.0

In [35]:
test_preds = discriminator.predict(X_test, batch_size=32) >= 0.5
accuracy_score(y_test, test_preds)

1.0

In [36]:
generated_preds = discriminator.predict(generated, batch_size=32) >= 0.5
generated_preds

array([[ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True]])

## Model 2: hidden_dims x LSTM

In [37]:
name = 'NBD_lstm_only'

In [None]:
timesteps = 64  # Length of your sequences
embed_size = 32
hidden_dims = (16, 8, 1)
dropout = 0

inputs = Input(shape=(timesteps,))
embedded = Embedding(vocab_size, embed_size)(inputs)
x = LSTM(hidden_dims[0], return_sequences=True, dropout=dropout)(embedded)
for hidden_dim in hidden_dims[1:-1]:
    x = LSTM(hidden_dim, return_sequences=True, dropout=dropout)(x)
#flattened = Flatten()(x)
res = LSTM(hidden_dims[-1], activation='sigmoid')(x)

discriminator = Model(inputs, res)

In [None]:
discriminator.summary()

In [None]:
lr = 5e-3

mc = ModelCheckpoint(f'Models/{name}.hdf5', monitor='val_loss')

optimizer = Adam(learning_rate=lr)

discriminator.compile(optimizer, loss='binary_crossentropy')


In [None]:
hist = discriminator.fit(X_train, y_train,
                epochs=50,
                batch_size=32,
                shuffle=True,
                validation_data=(X_val, y_val),
                callbacks=[mc])

In [None]:
plt.figure(figsize=(20, 6))

plt.plot(hist.history['loss'], label='training')
plt.plot(hist.history['val_loss'], label='validation')
plt.legend()

plt.savefig(f'figures/{name}', dpi=400)

In [38]:
discriminator = load_model(f'Models/{name}.hdf5')



In [39]:
train_preds = discriminator.predict(X_train, batch_size=32) >= 0.5
accuracy_score(y_train, train_preds)

1.0

In [40]:
val_preds = discriminator.predict(X_val, batch_size=32) >= 0.5
accuracy_score(y_val, val_preds)

1.0

In [41]:
test_preds = discriminator.predict(X_test, batch_size=32) >= 0.5
accuracy_score(y_test, test_preds)

1.0

In [42]:
generated_preds = discriminator.predict(generated, batch_size=32) >= 0.5
generated_preds

array([[ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True]])

## Model 3: (hidden_dims - 1) x LSTM + Dense

In [43]:
name = 'NBD_lstm_dense'

In [None]:
timesteps = 64  # Length of your sequences
embed_size = 32
hidden_dims = (16, 8, 1)
dropout = 0

inputs = Input(shape=(timesteps,))
embedded = Embedding(vocab_size, embed_size)(inputs)
x = LSTM(hidden_dims[0], return_sequences=True, dropout=dropout)(embedded)
for hidden_dim in hidden_dims[1:-1]:
    x = LSTM(hidden_dim, return_sequences=True, dropout=dropout)(x)
flattened = Flatten()(x)
res = Dense(hidden_dims[-1], activation='sigmoid')(flattened)

discriminator = Model(inputs, res)

In [None]:
discriminator.summary()

In [None]:
lr = 5e-3

mc = ModelCheckpoint(f'Models/{name}.hdf5', monitor='val_loss')


optimizer = Adam(learning_rate=lr)

discriminator.compile(optimizer, loss='binary_crossentropy')


In [None]:
hist = discriminator.fit(X_train, y_train,
                epochs=50,
                batch_size=32,
                shuffle=True,
                validation_data=(X_val, y_val),
                callbacks=[mc])

In [None]:
plt.figure(figsize=(20, 6))

plt.plot(hist.history['loss'], label='training')
plt.plot(hist.history['val_loss'], label='validation')
plt.legend()

plt.savefig(f'figures/{name}', dpi=400)

In [44]:
discriminator = load_model(f'Models/{name}.hdf5')

In [45]:
train_preds = discriminator.predict(X_train, batch_size=32) >= 0.5
accuracy_score(y_train, train_preds)

1.0

In [46]:
val_preds = discriminator.predict(X_val, batch_size=32) >= 0.5
accuracy_score(y_val, val_preds)

1.0

In [47]:
test_preds = discriminator.predict(X_test, batch_size=32) >= 0.5
accuracy_score(y_test, test_preds)

1.0

In [48]:
generated_preds = discriminator.predict(generated, batch_size=32) >= 0.5
generated_preds

array([[ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True]])

## Model 4: (filter_sizes - 1) x CONV + Dense

In [49]:
name = 'NBD_conv_dense'

In [None]:
timesteps = 64  # Length of your sequences
embed_size = 32
filter_sizes = (16, 8, 1)
kernel_sizes = (3, 3)
dropout = 0

inputs = Input(shape=(timesteps,))
embedded = Embedding(vocab_size, embed_size)(inputs)
x = Conv1D(filter_sizes[0], kernel_sizes[0], padding='same')(embedded)
for filter_size, kernel_size in zip(filter_sizes[1:-1], kernel_sizes[1:]):
    x = Conv1D(filter_size, kernel_size, padding='same')(x)
flattened = Flatten()(x)
res = Dense(filter_sizes[-1], activation='sigmoid')(flattened)

discriminator = Model(inputs, res)

In [None]:
discriminator.summary()

In [None]:
lr = 5e-3

mc = ModelCheckpoint(f'Models/{name}.hdf5', monitor='val_loss')

optimizer = Adam(learning_rate=lr)

discriminator.compile(optimizer, loss='binary_crossentropy')


In [None]:
hist = discriminator.fit(X_train, y_train,
                epochs=50,
                batch_size=32,
                shuffle=True,
                validation_data=(X_val, y_val),
                callbacks=[mc])

In [None]:
plt.figure(figsize=(20, 6))

plt.plot(hist.history['loss'], label='training')
plt.plot(hist.history['val_loss'], label='validation')
plt.legend()

plt.savefig(f'figures/{name}', dpi=400)

In [50]:
discriminator = load_model(f'Models/{name}.hdf5')

In [51]:
train_preds = discriminator.predict(X_train, batch_size=32) >= 0.5
accuracy_score(y_train, train_preds)

1.0

In [52]:
val_preds = discriminator.predict(X_val, batch_size=32) >= 0.5
accuracy_score(y_val, val_preds)

1.0

In [53]:
test_preds = discriminator.predict(X_test, batch_size=32) >= 0.5
accuracy_score(y_test, test_preds)

1.0

In [54]:
generated_preds = discriminator.predict(generated, batch_size=32) >= 0.5
generated_preds

array([[ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True]])