# Deep Learning Neural Network

## Import packages

In [1]:
import numpy as np
import pandas as pd
from numpy.random import seed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

seed(6162)

## Read the data

In [2]:
#Load the dataset
song_data = pd.read_csv('https://raw.githubusercontent.com/yashapatel131/KDD_GroupProject/main/Datasets/Song_Attributes.csv')
song_data.head()

Unnamed: 0,ID,Acousticness,Album,Artist,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Mode,Name,Popularity,popularityLabel,Speechiness,Tempo,TimeSignature,Valence
0,0,0.000728,Collective Soul (Deluxe Version),Collective Soul,0.52,234947,0.904,False,0.0103,0.0634,-5.03,1,Welcome All Again,35,Popular,0.0309,106.022,4,0.365
1,1,0.0182,Collective Soul (Deluxe Version),Collective Soul,0.581,239573,0.709,False,0.000664,0.174,-4.909,1,Fuzzy,31,Popular,0.0282,120.027,4,0.408
2,2,0.000473,Collective Soul (Deluxe Version),Collective Soul,0.572,198400,0.918,False,0.000431,0.0977,-3.324,0,Dig,30,Popular,0.0559,144.061,4,0.37
3,3,0.00097,Collective Soul (Deluxe Version),Collective Soul,0.596,231453,0.661,False,3.3e-05,0.113,-5.051,1,You,35,Popular,0.0254,111.975,4,0.183
4,4,3.6e-05,Collective Soul (Deluxe Version),Collective Soul,0.52,222520,0.808,False,1e-05,0.08,-4.553,0,My Days,21,Popular,0.0318,92.721,4,0.666


In [3]:
#Drop some columns
songAttributes = song_data.drop(columns=['ID', 'Album', 'Mode', 'Name'])
songAttributes.head()

Unnamed: 0,Acousticness,Artist,Danceability,Duration,Energy,Explicit,Instrumentalness,Liveness,Loudness,Popularity,popularityLabel,Speechiness,Tempo,TimeSignature,Valence
0,0.000728,Collective Soul,0.52,234947,0.904,False,0.0103,0.0634,-5.03,35,Popular,0.0309,106.022,4,0.365
1,0.0182,Collective Soul,0.581,239573,0.709,False,0.000664,0.174,-4.909,31,Popular,0.0282,120.027,4,0.408
2,0.000473,Collective Soul,0.572,198400,0.918,False,0.000431,0.0977,-3.324,30,Popular,0.0559,144.061,4,0.37
3,0.00097,Collective Soul,0.596,231453,0.661,False,3.3e-05,0.113,-5.051,35,Popular,0.0254,111.975,4,0.183
4,3.6e-05,Collective Soul,0.52,222520,0.808,False,1e-05,0.08,-4.553,21,Popular,0.0318,92.721,4,0.666


In [4]:
#Dummy encoding and view the new columns
songAttributes_binary_encoded = pd.get_dummies(songAttributes, columns=['Artist', 'TimeSignature', 'Explicit'])
songAttributes_binary_encoded.columns

Index(['Acousticness', 'Danceability', 'Duration', 'Energy',
       'Instrumentalness', 'Liveness', 'Loudness', 'Popularity',
       'popularityLabel', 'Speechiness',
       ...
       'Artist_t.A.T.u.', 'Artist_twenty one pilots', 'Artist_will.i.am',
       'TimeSignature_0', 'TimeSignature_1', 'TimeSignature_3',
       'TimeSignature_4', 'TimeSignature_5', 'Explicit_False',
       'Explicit_True'],
      dtype='object', length=1007)

## Splitting the data

In [5]:
X = songAttributes_binary_encoded.drop(['popularityLabel', 'Popularity', 'Artist_t.A.T.u.', 'TimeSignature_1', 'Explicit_False'], axis=1)
y = songAttributes_binary_encoded['popularityLabel']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.7, random_state = 6162)
X_train.head()

Unnamed: 0,Acousticness,Danceability,Duration,Energy,Instrumentalness,Liveness,Loudness,Speechiness,Tempo,Valence,...,Artist_iio,Artist_k.d. lang,Artist_matchbox twenty,Artist_twenty one pilots,Artist_will.i.am,TimeSignature_0,TimeSignature_3,TimeSignature_4,TimeSignature_5,Explicit_True
48090,0.118,0.608,289173,0.312,2.9e-05,0.103,-8.711,0.0263,93.034,0.223,...,0,0,0,0,0,0,1,0,0,0
30590,0.0121,0.381,490960,0.965,0.0218,0.966,-3.676,0.125,136.505,0.328,...,0,0,0,0,0,0,0,1,0,0
36643,0.105,0.532,228587,0.466,7.7e-05,0.0737,-6.412,0.0279,159.956,0.43,...,0,0,0,0,0,0,1,0,0,0
137260,0.00304,0.582,567346,0.961,0.807,0.0859,-10.149,0.0381,135.995,0.828,...,0,0,0,0,0,0,0,1,0,0
147494,0.371,0.855,324189,0.694,0.0,0.109,-4.393,0.169,122.078,0.49,...,0,0,0,0,0,0,0,1,0,1


## Transform the data

In [7]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [9]:
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

## Create the model

In [10]:
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=1002))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [11]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               100300    
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 2)                 202       
                                                                 
Total params: 110,602
Trainable params: 110,602
Non-trainable params: 0
_________________________________________________________________


## Fit the model

In [13]:
model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

Epoch 1/100
3390/3390 - 3s - loss: 0.5147 - accuracy: 0.7415 - 3s/epoch - 767us/step
Epoch 2/100
3390/3390 - 2s - loss: 0.4805 - accuracy: 0.7622 - 2s/epoch - 656us/step
Epoch 3/100
3390/3390 - 2s - loss: 0.4683 - accuracy: 0.7707 - 2s/epoch - 649us/step
Epoch 4/100
3390/3390 - 2s - loss: 0.4590 - accuracy: 0.7751 - 2s/epoch - 655us/step
Epoch 5/100
3390/3390 - 2s - loss: 0.4515 - accuracy: 0.7792 - 2s/epoch - 669us/step
Epoch 6/100
3390/3390 - 2s - loss: 0.4456 - accuracy: 0.7836 - 2s/epoch - 709us/step
Epoch 7/100
3390/3390 - 2s - loss: 0.4397 - accuracy: 0.7867 - 2s/epoch - 675us/step
Epoch 8/100
3390/3390 - 2s - loss: 0.4341 - accuracy: 0.7891 - 2s/epoch - 734us/step
Epoch 9/100
3390/3390 - 3s - loss: 0.4290 - accuracy: 0.7916 - 3s/epoch - 818us/step
Epoch 10/100
3390/3390 - 3s - loss: 0.4239 - accuracy: 0.7951 - 3s/epoch - 752us/step
Epoch 11/100
3390/3390 - 3s - loss: 0.4186 - accuracy: 0.7968 - 3s/epoch - 768us/step
Epoch 12/100
3390/3390 - 3s - loss: 0.4137 - accuracy: 0.7999 -

Epoch 97/100
3390/3390 - 2s - loss: 0.2339 - accuracy: 0.8923 - 2s/epoch - 697us/step
Epoch 98/100
3390/3390 - 2s - loss: 0.2330 - accuracy: 0.8920 - 2s/epoch - 712us/step
Epoch 99/100
3390/3390 - 2s - loss: 0.2315 - accuracy: 0.8933 - 2s/epoch - 718us/step
Epoch 100/100
3390/3390 - 2s - loss: 0.2309 - accuracy: 0.8942 - 2s/epoch - 709us/step


<keras.callbacks.History at 0x28f02432730>

## Evaluation of model

In [14]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1453/1453 - 1s - loss: 1.1926 - accuracy: 0.7261 - 849ms/epoch - 584us/step
Normal Neural Network - Loss: 1.1925851106643677, Accuracy: 0.7261402606964111


In [15]:
encoded_predictions = model.predict(X_test_scaled)
prediction_labels_num = np.argmax(encoded_predictions, axis=1)

In [16]:
prediction_labels = [str(x) for x in prediction_labels_num]
for x in range(len(prediction_labels)):
    if prediction_labels[x] == '0':
        prediction_labels[x] = 'Popular'
    else:
        prediction_labels[x] = 'Unpopular'

In [17]:
results = confusion_matrix(y_test, prediction_labels) 
print('Confusion Matrix:')
print(results) 
print('Accuracy Score:', accuracy_score(y_test, prediction_labels))
print('Report:')
print(classification_report(y_test, prediction_labels))

Confusion Matrix:
[[13848  6598]
 [ 6131 19903]]
Accuracy Score: 0.7261402753872633
Report:
              precision    recall  f1-score   support

     Popular       0.69      0.68      0.69     20446
   Unpopular       0.75      0.76      0.76     26034

    accuracy                           0.73     46480
   macro avg       0.72      0.72      0.72     46480
weighted avg       0.73      0.73      0.73     46480

