In [37]:
# Import the libraries
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns  # for nicer plots
sns.set(style="darkgrid")  # default style

import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, classification_report

In [38]:
#load data
X_train = pd.read_csv('../data/processed/X_train.csv')
Y_train = pd.read_csv('../data/processed/Y_train.csv')
X_val = pd.read_csv('../data/processed/X_val.csv')
Y_val = pd.read_csv('../data/processed/Y_val.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')
Y_test = pd.read_csv('../data/processed/Y_test.csv')

### Implementing Baseline

In [39]:
#show class distribution
print(Y_train.value_counts())

music_category
0                 44460
3                  7242
2                  7190
4                  3625
1                  2958
5                  1755
6                  1171
Name: count, dtype: int64


Since class distribution isn't too unbalanced, implement majority baseline and stratified baseline.

In [40]:
#majority baseline implementation
dummy_clf = DummyClassifier(strategy = 'stratified', random_state = 42)
dummy_clf.fit(X_train, Y_train)
Y_pred = dummy_clf.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
report = classification_report(Y_test, Y_pred)

print(f'Baseline Classifier Accuracy: {accuracy}')
print(f'Classification Report: {report}')

Baseline Classifier Accuracy: 0.45344094039212246
Classification Report:               precision    recall  f1-score   support

           0       0.65      0.65      0.65     14744
           1       0.05      0.05      0.05      1033
           2       0.12      0.12      0.12      2388
           3       0.11      0.11      0.11      2407
           4       0.05      0.05      0.05      1154
           5       0.03      0.02      0.03       641
           6       0.02      0.02      0.02       432

    accuracy                           0.45     22799
   macro avg       0.15      0.15      0.15     22799
weighted avg       0.45      0.45      0.45     22799



### Building a Model

In [42]:
len(Y_train['music_category'].unique())

7

In [43]:
X_train.shape

(68401, 22)

In [44]:
def build_model(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [46]:
model = build_model(num_features = X_train.shape[1], n_classes = len(Y_train['music_category'].unique()))
model.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 7)                 161       
                                                                 
Total params: 161 (644.00 Byte)
Trainable params: 161 (644.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [47]:
# Fit the model.
history = model.fit(
  x = X_train,   # training examples
  y = Y_train,   #labels
  epochs=5,             # number of passes through the training data
  batch_size=64,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history = pd.DataFrame(history.history)
display(history)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,,0.577047,,0.652975
1,,0.649659,,0.652975
2,,0.649659,,0.652975
3,,0.649659,,0.652975
4,,0.649659,,0.652975


### Feature Scaling & Normalization

In [48]:
X_train.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,...,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0
mean,0.567052,0.641239,-8.253307,0.639327,0.084536,0.315946,0.15446,0.214324,0.474524,122.028761,...,0.103947,0.031301,0.079459,0.081959,0.068582,0.115351,0.06307,0.100044,0.06617,0.081681
std,0.173204,0.251079,5.033128,0.480199,0.105831,0.332552,0.308156,0.191372,0.259062,29.964368,...,0.305194,0.174132,0.270456,0.274304,0.252744,0.319447,0.243091,0.300061,0.24858,0.273881
min,0.0,0.0,-49.531,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.457,0.473,-10.0085,0.0,0.0358,0.0172,0.0,0.0981,0.261,99.0415,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.58,0.684,-7.001,1.0,0.0489,0.17,3.9e-05,0.132,0.464,122.013,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.695,0.853,-4.997,1.0,0.0843,0.599,0.0462,0.275,0.684,140.051,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.983,1.0,4.532,1.0,0.963,0.996,1.0,1.0,0.995,222.605,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [49]:
# YOUR CODE HERE
X_train_features_norm = (X_train - X_train.mean())/X_train.std() 
X_test_features_norm = (Y_test - X_train.mean())/X_train.std()
#only train features shown since we're not supposed to look at test
X_train_features_norm.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,...,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0,68400.0
mean,2.9294310000000004e-17,-3.119532e-16,3.022923e-16,7.842979e-18,-1.006602e-16,-1.038805e-18,-1.8698490000000002e-18,-5.0693690000000005e-17,-3.0644750000000005e-17,2.04333e-16,...,-7.853367000000001e-17,-2.295759e-17,-3.5527140000000005e-17,-5.2044140000000004e-17,-4.6538470000000005e-17,-8.310441e-19,-5.651100000000001e-17,-3.0372070000000004e-17,-1.558208e-17,-1.9529540000000003e-17
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-3.273905,-2.553939,-8.201201,-1.33138,-0.7987817,-0.9500644,-0.5012398,-1.119934,-1.831698,-4.072462,...,-0.3405939,-0.1797557,-0.2937968,-0.2987888,-0.2713496,-0.3610952,-0.259451,-0.3334121,-0.2661902,-0.2982369
25%,-0.6353934,-0.6700669,-0.3487281,-1.33138,-0.4605054,-0.8983432,-0.5012398,-0.6073187,-0.8242186,-0.7671532,...,-0.3405939,-0.1797557,-0.2937968,-0.2987888,-0.2713496,-0.3610952,-0.259451,-0.3334121,-0.2661902,-0.2982369
50%,0.07475318,0.1703073,0.2488128,0.7510894,-0.3367228,-0.4388666,-0.5011142,-0.4301765,-0.04062375,-0.000525979,...,-0.3405939,-0.1797557,-0.2937968,-0.2987888,-0.2713496,-0.3610952,-0.259451,-0.3334121,-0.2661902,-0.2982369
75%,0.7387114,0.8434033,0.6469748,0.7510894,-0.002226178,0.8511561,-0.3513159,0.3170606,0.8085923,0.6014557,...,-0.3405939,-0.1797557,-0.2937968,-0.2987888,-0.2713496,-0.3610952,-0.259451,-0.3334121,-0.2661902,-0.2982369
max,2.401494,1.428877,2.540231,0.7510894,8.30066,2.044953,2.743865,4.1055,2.009075,3.356528,...,2.936005,5.563024,3.403664,3.346797,3.685229,2.769312,3.854236,2.999247,3.756658,3.35299


### Model 2

In [50]:
def build_model2(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))

    #adding extra layer
    model.add(keras.layers.Dense(
            units=256,
            activation = 'relu'))

    #add extra layer
    model.add(keras.layers.Dense(
            units=128,
            activation = 'relu'))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [51]:
model2 = build_model2(num_features = X_train.shape[1], n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               5888      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 7)                 903       
                                                                 
Total params: 39687 (155.03 KB)
Trainable params: 39687 (155.03 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [52]:
# Fit the model.
history2 = model2.fit(
  x = X_train,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,,0.577047,,0.652975
1,,0.649659,,0.652975
2,,0.649659,,0.652975
3,,0.649659,,0.652975
4,,0.649659,,0.652975


### Testing - Feature Selection

In [53]:
X_train.columns

Index(['danceability', 'energy', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'C',
       'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'],
      dtype='object')

In [54]:
features = ['danceability','energy', 'loudness', 'valence', 'tempo']
X_train = X_train[features]

In [55]:
X_train.head()

Unnamed: 0,danceability,energy,loudness,valence,tempo
0,0.689,0.59,-9.272,0.872,93.914
1,0.584,0.626,-5.068,0.835,112.36
2,0.406,0.822,-4.172,0.534,127.688
3,0.728,0.514,-6.499,0.871,170.055
4,0.483,0.491,-5.389,0.4,111.956


In [56]:
model2 = build_model2(num_features = len(features), n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               1536      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 7)                 903       
                                                                 
Total params: 35335 (138.03 KB)
Trainable params: 35335 (138.03 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [57]:
# Fit the model.
history2 = model2.fit(
  x = X_train,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,,0.577047,,0.652975
1,,0.649659,,0.652975
2,,0.649659,,0.652975
3,,0.649659,,0.652975
4,,0.649659,,0.652975
