# Will a Viral Song Enter the Top 200 Streamed Tracks?

In [1]:
import numpy as np
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy import types
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session

from connections import password

# Data Import

In [2]:
# Establish SQL connection
connection_string = (f"root:{password}@localhost/spot_db")
engine = create_engine(f"mysql://{connection_string}")# , pool_recycle=3600, pool_pre_ping=True)

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)
# Save reference to the table
_features = Base.classes.features
_top = Base.classes.top_200_daily
# Create connection object
session = Session(engine)

In [3]:
features = pd.read_sql(session.query(_features).statement, session.bind)

In [4]:
top = pd.read_sql(session.query(_top).statement, session.bind)

# Data Pre-Processing

In [5]:
features['label'] = features.ID.isin(top.ID)
features.head()

Unnamed: 0,index,Acousticness,Danceability,Duration_ms,Energy,ID,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time_Signature,Valence,label
0,805,0.131,0.748,188491,0.627,000xQL6tZNLJzIrtIgxqSl,0.0,7,0.0852,-6.029,1,0.0644,120.963,4,0.524,True
1,4111,0.0822,0.353,193680,0.755,003eoIwxETJujVWmNFMoZy,0.0,1,0.39,-6.276,0,0.733,191.153,4,0.437,True
2,3606,0.287,0.62,284856,0.625,00B7TZ0Xawar6NZ00JFomN,0.0,9,0.314,-7.438,1,0.553,167.911,4,0.665,True
3,4602,0.461,0.878,286517,0.407,00bWqt93aqLXqKtzZoq7Jw,0.0,7,0.111,-10.941,1,0.233,89.011,4,0.547,False
4,1671,0.283,0.587,224412,0.62,00c9VpjXk7iHdLEEzrAxbr,0.0,11,0.128,-6.217,1,0.0504,99.926,4,0.143,False


In [6]:
features.tail()

Unnamed: 0,index,Acousticness,Danceability,Duration_ms,Energy,ID,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time_Signature,Valence,label
5077,3030,0.0163,0.662,209893,0.97,7zuLFva2TbSg6mBijigBzm,0.00207,9,0.0979,-4.855,1,0.0871,126.038,4,0.675,False
5078,937,0.0202,0.621,204879,0.88,7zva8L4Db1fMCN30Y9cmJD,0.628,7,0.0828,-2.833,0,0.0438,139.966,4,0.58,False
5079,134,0.0795,0.717,196173,0.587,7zvKFw17XyoBUx9mHiwzPy,1.9e-05,7,0.192,-8.97,1,0.368,112.44,5,0.336,False
5080,544,0.105,0.748,179773,0.723,7zxRMhXxJMQCeDDg0rKAVo,0.0,0,0.0949,-6.213,1,0.347,154.966,4,0.35,True
5081,2424,0.0692,0.863,246009,0.609,7zyHb6wrRALMIyCZgwLw5u,0.0,10,0.141,-3.556,1,0.27,115.054,4,0.701,False


In [7]:
X = features.drop(['label', 'index', 'ID'], axis = 1)
y = features['label']

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state = 1, stratify = y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Label-encode dataset
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Convert encoded labels to one-hot-encodeing


y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create Deep Learning Model

In [92]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=5, activation='relu', input_dim=13))
model.add(Dense(units=15, activation='relu'))
model.add(Dense(units=10, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [93]:
# Compile and fit the model
model.compile(optimizer = 'adam',
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])
model.fit(X_train_scaled, 
         y_train_categorical,
         epochs = 500,
         shuffle = True,
         verbose = 2)

Epoch 1/500
 - 1s - loss: 0.6551 - acc: 0.6137
Epoch 2/500
 - 0s - loss: 0.5910 - acc: 0.7085
Epoch 3/500
 - 0s - loss: 0.5873 - acc: 0.7085
Epoch 4/500
 - 0s - loss: 0.5842 - acc: 0.7085
Epoch 5/500
 - 0s - loss: 0.5818 - acc: 0.7085
Epoch 6/500
 - 0s - loss: 0.5800 - acc: 0.7085
Epoch 7/500
 - 0s - loss: 0.5793 - acc: 0.7085
Epoch 8/500
 - 0s - loss: 0.5775 - acc: 0.7085
Epoch 9/500
 - 0s - loss: 0.5758 - acc: 0.7085
Epoch 10/500
 - 0s - loss: 0.5744 - acc: 0.7085
Epoch 11/500
 - 0s - loss: 0.5737 - acc: 0.7082
Epoch 12/500
 - 0s - loss: 0.5724 - acc: 0.7085
Epoch 13/500
 - 0s - loss: 0.5719 - acc: 0.7085
Epoch 14/500
 - 0s - loss: 0.5703 - acc: 0.7085
Epoch 15/500
 - 0s - loss: 0.5691 - acc: 0.7082
Epoch 16/500
 - 0s - loss: 0.5683 - acc: 0.7085
Epoch 17/500
 - 0s - loss: 0.5681 - acc: 0.7085
Epoch 18/500
 - 0s - loss: 0.5667 - acc: 0.7085
Epoch 19/500
 - 0s - loss: 0.5667 - acc: 0.7087
Epoch 20/500
 - 0s - loss: 0.5653 - acc: 0.7085
Epoch 21/500
 - 0s - loss: 0.5650 - acc: 0.7085
E

<keras.callbacks.History at 0x268d5243ac8>

# Performance

In [94]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.6110756081169182, Accuracy: 0.7057435094700565


# Prediction Test

In [13]:
encoded_predictions = model.predict_classes(X_test_scaled[100:105])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[100:105])}")

Predicted classes: [False False False False False]
Actual Labels: [False, False, True, False, False]


  if diff:
