In [10]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers

In [11]:
dataframe = pd.read_csv("fixed_cleaned_spotify_data.csv")


In [12]:
dataframe.head()

Unnamed: 0,track,artist,uri,danceability,energy,tempo
0,Lucky Man,Montgomery Gentry,spotify:track:4GiXBCUF7H6YfNQsnBRIzl,0.578,0.471,133.061
1,On The Hotline,Pretty Ricky,spotify:track:1zyqZONW985Cs4osz9wlsu,0.704,0.854,92.988
2,Clouds Of Dementia,Candlemass,spotify:track:6cHZf7RbxXCKwEkgAZT4mY,0.162,0.836,86.964
3,"Heavy Metal, Raise Hell!",Zwartketterij,spotify:track:2IjBPp2vMeX7LggzRN3iSX,0.188,0.994,148.44
4,I Got A Feelin,Billy Currington,spotify:track:1tF370eYXUcWwkIvaq3IGz,0.63,0.764,112.098


In [27]:
dataframe['energy'] = dataframe['energy'].astype(int)
# dataframe = dataframe.astype({"track":'str',"artist":'str',"uri":'str',"danceability":'int',"energy":'int',"tempo":'int'})

In [28]:
dataframe['energy'].dtypes()

TypeError: 'numpy.dtype' object is not callable

In [None]:
val_dataframe = dataframe.sample(frac=0.2, random_state=1337)
train_dataframe = dataframe.drop(val_dataframe.index)

print(
    "Using %d samples for training and %d for validation"
    % (len(train_dataframe), len(val_dataframe))
)

In [None]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("tempo")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds


train_ds = dataframe_to_dataset(train_dataframe)
val_ds = dataframe_to_dataset(val_dataframe)

In [None]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

In [None]:
train_ds = train_ds.batch(32)
val_ds = val_ds.batch(32)

In [None]:
from tensorflow.keras.layers import IntegerLookup
from tensorflow.keras.layers import Normalization
from tensorflow.keras.layers import StringLookup

In [None]:
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature

In [None]:
def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature


In [None]:
track = keras.Input(shape=(1,), name="track", dtype="object")
artist = keras.Input(shape=(1,), name="artist", dtype="object")
uri = keras.Input(shape=(1,), name="uri", dtype="object")


# Numerical features
danceability = keras.Input(shape=(1,), name="danceability")
energy = keras.Input(shape=(1,), name="energy")
tempo = keras.Input(shape=(1,), name="tempo")


all_inputs = [
    track,
    artist,
    uri,
    danceability,
    energy,
    tempo,
]

# Integer categorical features
track_encoded = encode_categorical_feature(track, "track", train_ds, True)
artist_encoded = encode_categorical_feature(artist, "artist", train_ds, True)
uri_encoded = encode_categorical_feature(uri, "uri", train_ds, True)


# Numerical features
danceability_encoded = encode_numerical_feature(danceability, "danceability", train_ds)
energy_encoded = encode_numerical_feature(energy, "energy", train_ds)
tempo_encoded = encode_numerical_feature(tempo, "tempo", train_ds)

all_features = layers.concatenate(
    [
        track_encoded,
        artist_encoded,
        uri_encoded,
        danceability_encoded,
        energy_encoded,
        tempo_encoded,
    ]
)
x = layers.Dense(32, activation="relu")(all_features)
x = layers.Dropout(0.5)(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])