# Will a Viral Song Enter the Top 200 Streamed Tracks?

In [1]:
import numpy as np
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy import types
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session

from connections import password

# Data Import

In [2]:
# Establish SQL connection
connection_string = (f"root:{password}@localhost/spot_db")
engine = create_engine(f"mysql://{connection_string}")# , pool_recycle=3600, pool_pre_ping=True)

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)
# Save reference to the table
_features = Base.classes.features
_top = Base.classes.top_200_daily
# Create connection object
session = Session(engine)

In [3]:
features = pd.read_sql(session.query(_features).statement, session.bind)

In [4]:
top = pd.read_sql(session.query(_top).statement, session.bind)

# Data Pre-Processing

In [5]:
features['label'] = features.ID.isin(top.ID)
features.head()

Unnamed: 0,index,Acousticness,Danceability,Duration_ms,Energy,ID,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time_Signature,Valence,label
0,1560,0.131,0.748,188491,0.627,000xQL6tZNLJzIrtIgxqSl,0.0,7,0.0852,-6.029,1,0.0644,120.963,4,0.524,True
1,6494,0.0822,0.353,193680,0.755,003eoIwxETJujVWmNFMoZy,0.0,1,0.39,-6.276,0,0.733,191.153,4,0.437,True
2,1345,0.00239,0.588,262347,0.885,004S8bMhFQjnbuqvdh6W71,0.00246,9,0.0862,-6.267,0,0.0654,149.031,4,0.928,False
3,7841,0.00304,0.619,218747,0.762,007d7JT41sSc1HqWTs4uw7,0.0,0,0.122,-6.738,0,0.0692,122.916,4,0.324,True
4,5989,0.287,0.62,284856,0.625,00B7TZ0Xawar6NZ00JFomN,0.0,9,0.314,-7.438,1,0.553,167.911,4,0.665,True


In [6]:
features.tail()

Unnamed: 0,index,Acousticness,Danceability,Duration_ms,Energy,ID,Instrumentalness,Key,Liveness,Loudness,Mode,Speechiness,Tempo,Time_Signature,Valence,label
11715,8088,0.00898,0.612,188253,0.799,7zVCrzzEJU7u24sbJPXA5W,0.0,10,0.172,-4.603,1,0.0275,125.976,4,0.687,True
11716,268,0.0795,0.717,196173,0.587,7zvKFw17XyoBUx9mHiwzPy,1.9e-05,7,0.192,-8.97,1,0.368,112.44,5,0.336,False
11717,972,0.635,0.415,276875,0.415,7zxrA6P9OWCc1nXF6GGLIp,0.00521,10,0.0874,-10.784,0,0.0501,139.822,5,0.179,False
11718,1072,0.105,0.748,179773,0.723,7zxRMhXxJMQCeDDg0rKAVo,0.0,0,0.0949,-6.213,1,0.347,154.966,4,0.35,True
11719,4659,0.0692,0.863,246009,0.609,7zyHb6wrRALMIyCZgwLw5u,0.0,10,0.141,-3.556,1,0.27,115.054,4,0.701,False


In [7]:
X = features.drop(['label', 'index', 'ID'], axis = 1)
y = features['label']

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state = 1, stratify = y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Label-encode dataset
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Convert encoded labels to one-hot-encodeing


y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create Deep Learning Model

In [10]:
from keras.models import Sequential
from keras.layers import Dense

### Viral and Top Model

In [11]:
# # Create model and add layers
# model = Sequential()
# model.add(Dense(units=5, activation='relu', input_dim=13))
# # model.add(Dense(units=4, activation='relu'))
# model.add(Dense(units=2, activation='softmax'))

### Genre and Top Model

In [16]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=13))
model.add(Dense(units=30, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [19]:
# Compile and fit the model
model.compile(optimizer = 'adam',
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])
model.fit(X_train_scaled, 
         y_train_categorical,
         epochs = 500,
         shuffle = True,
         verbose = 2)

Epoch 1/500
 - 0s - loss: 0.3648 - acc: 0.8324
Epoch 2/500
 - 0s - loss: 0.3600 - acc: 0.8333
Epoch 3/500
 - 0s - loss: 0.3577 - acc: 0.8402
Epoch 4/500
 - 0s - loss: 0.3575 - acc: 0.8400
Epoch 5/500
 - 0s - loss: 0.3575 - acc: 0.8370
Epoch 6/500
 - 0s - loss: 0.3529 - acc: 0.8381
Epoch 7/500
 - 0s - loss: 0.3547 - acc: 0.8380
Epoch 8/500
 - 0s - loss: 0.3547 - acc: 0.8411
Epoch 9/500
 - 0s - loss: 0.3519 - acc: 0.8395
Epoch 10/500
 - 0s - loss: 0.3521 - acc: 0.8374
Epoch 11/500
 - 0s - loss: 0.3487 - acc: 0.8448
Epoch 12/500
 - 0s - loss: 0.3490 - acc: 0.8432
Epoch 13/500
 - 0s - loss: 0.3463 - acc: 0.8447
Epoch 14/500
 - 0s - loss: 0.3454 - acc: 0.8408
Epoch 15/500
 - 0s - loss: 0.3442 - acc: 0.8425
Epoch 16/500
 - 0s - loss: 0.3427 - acc: 0.8462
Epoch 17/500
 - 0s - loss: 0.3412 - acc: 0.8421
Epoch 18/500
 - 0s - loss: 0.3404 - acc: 0.8481
Epoch 19/500
 - 0s - loss: 0.3411 - acc: 0.8477
Epoch 20/500
 - 0s - loss: 0.3400 - acc: 0.8454
Epoch 21/500
 - 0s - loss: 0.3352 - acc: 0.8495
E

<keras.callbacks.History at 0x1e53fa20400>

# Performance

In [20]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 2.0591251208920527, Accuracy: 0.6604095562326217


# Prediction Test

In [15]:
encoded_predictions = model.predict_classes(X_test_scaled[400:410])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[400:410])}")

Predicted classes: [False False False  True False  True  True False False False]
Actual Labels: [True, False, True, True, False, False, True, False, False, False]


  if diff:
