In [70]:
# Preprocesing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# model libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, StringLookup, IntegerLookup, CategoryEncoding
from tensorflow.keras import Model, layers
from tensorflow.keras.optimizers import Adam

In [87]:
# put data into a dataframe
df = pd.read_csv("Data/features_30_sec.csv")

# remove unnessary columns
input_data = df.drop(labels=['filename', 'length'], axis=1)

# get all of the columns for specifying network features
columns = df.columns
features = columns[2:]
num_of_features = len(features[2:])

# one hot encoding for string labels
vocab = ['disco', 'metal', 'classical', 'reggae', 'blues', 'rock', 'hiphop', 'jazz', 'pop', 'country']
input_data['label'] = pd.get_dummies(input_data['label'], vocab)

# split data into training, testing, and validation
train, test = train_test_split(input_data[features], test_size=0.2)
train, val = train_test_split(train, test_size=0.2)

# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
	dataframe = dataframe.copy()
	labels = dataframe.pop('label')
	ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
	if shuffle:
		ds = ds.shuffle(buffer_size=len(dataframe))
	ds = ds.batch(batch_size)
	ds = ds.prefetch(batch_size)
	return ds

In [85]:

# normalize the data so mu is 0 and std dev is 1
def get_normalization_layer(name, dataset):
	# Create a Normalization layer for our feature.
	normalizer = tf.keras.layers.Normalization(axis=None)

	# Prepare a Dataset that only yields our feature.
	feature_ds = dataset.map(lambda x, y: x[name])

	# Learn the statistics of the data.
	normalizer.adapt(feature_ds)

	return normalizer

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([ 1.3380439 , -0.5863977 , -1.2642847 ,  0.25080043, -1.734972  ],
      dtype=float32)>

In [None]:
# convert the train, validation, and test set to a tensor
batch_size = 256
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

all_inputs = []
encoded_features = []

# for numerical features, dont include label in the last column
for header in features[:-1]:
	numeric_col = tf.keras.Input(shape=(1,), name=header)
	normalization_layer = get_normalization_layer(header, train_ds)
	encoded_numeric_col = normalization_layer(numeric_col)
	all_inputs.append(numeric_col)
	encoded_features.append(encoded_numeric_col)

# concat the tensors of each feature for network input 
all_features = tf.keras.layers.concatenate(encoded_features)

# neural network
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=["accuracy"])

tf.keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

In [98]:
# fit the model and validate it
model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x18c6c7ca0>

In [99]:
# evaluate the model with test data
loss, accuracy = model.evaluate(test_ds)
print("Accuracy:", accuracy)

Accuracy 0.9449999928474426
