In [1]:
# Import the libraries
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns  # for nicer plots
sns.set(style="darkgrid")  # default style

import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, classification_report




In [2]:
#load data
X_train = pd.read_csv('../data/processed/X_train.csv')
Y_train = pd.read_csv('../data/processed/Y_train.csv')
X_val = pd.read_csv('../data/processed/X_val.csv')
Y_val = pd.read_csv('../data/processed/Y_val.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')
Y_test = pd.read_csv('../data/processed/Y_test.csv')

### Implementing Baseline

In [3]:
#show class distribution
class_counts = Y_train.value_counts().reset_index()
class_counts

Unnamed: 0,music_category,count
0,2,18244
1,1,6504
2,5,4164
3,0,4163
4,3,3338
5,4,1993
6,10,1689
7,6,1338
8,9,1034
9,7,840


Since class distribution isn't too unbalanced, implement majority baseline and stratified baseline. ## EDIT class majority big now

In [4]:
#majority baseline implementation
dummy_clf = DummyClassifier(strategy = 'stratified', random_state = 42)
dummy_clf.fit(X_train, Y_train)
Y_pred = dummy_clf.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
report = classification_report(Y_test, Y_pred)

print(f'Baseline Classifier Accuracy: {accuracy}')
print(f'Classification Report: {report}')

Baseline Classifier Accuracy: 0.22416033928449278
Classification Report:               precision    recall  f1-score   support

           0       0.10      0.09      0.10      1456
           1       0.14      0.14      0.14      2098
           2       0.42      0.42      0.42      6072
           3       0.08      0.07      0.08      1135
           4       0.05      0.05      0.05       672
           5       0.08      0.09      0.09      1329
           6       0.03      0.03      0.03       477
           7       0.01      0.01      0.01       284
           8       0.01      0.01      0.01       199
           9       0.03      0.03      0.03       325
          10       0.04      0.04      0.04       572

    accuracy                           0.22     14619
   macro avg       0.09      0.09      0.09     14619
weighted avg       0.22      0.22      0.22     14619



#### Testing for uniform distribution of classes

In [5]:
min_count = class_counts['count'].min()
result = {}
unique_classes = np.unique(Y_train)
for value in unique_classes:
    indices = np.where(Y_train==value)[0][:min_count]
    result[value] = indices.tolist()
index_values = list(result.values())
index_values = [element for nestedlist in index_values for element in nestedlist]
X_train_uniform = X_train.iloc[index_values]
Y_train_uniform = Y_train.iloc[index_values]

In [6]:
X_train_uniform.info(20)

<class 'pandas.core.frame.DataFrame'>
Index: 6094 entries, 3 to 15074
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      6094 non-null   float64
 1   energy            6094 non-null   float64
 2   loudness          6094 non-null   float64
 3   mode              6094 non-null   int64  
 4   speechiness       6094 non-null   float64
 5   acousticness      6094 non-null   float64
 6   instrumentalness  6094 non-null   float64
 7   liveness          6094 non-null   float64
 8   valence           6094 non-null   float64
 9   tempo             6094 non-null   float64
 10  C                 6094 non-null   int64  
 11  C#                6094 non-null   int64  
 12  D                 6094 non-null   int64  
 13  D#                6094 non-null   int64  
 14  E                 6094 non-null   int64  
 15  F                 6094 non-null   int64  
 16  F#                6094 non-null   int64  
 17 

### Building a Model

In [7]:
len(Y_train['music_category'].unique())

11

In [8]:
X_train.shape

(43861, 22)

In [9]:
def build_model(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [10]:
model = build_model(num_features = X_train.shape[1], n_classes = len(Y_train['music_category'].unique()))
model.summary()


Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                253       
                                                                 
Total params: 253 (1012.00 Byte)
Trainable params: 253 (1012.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
# Fit the model.
history = model.fit(
  x = X_train,   # training examples
  y = Y_train,   #labels
  epochs=5,             # number of passes through the training data
  batch_size=64,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history = pd.DataFrame(history.history)
display(history)

Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,51.576462,0.234965,31.327288,0.155231
1,47.997734,0.241653,31.760582,0.401641
2,46.617039,0.244287,30.507904,0.33736
3,45.026958,0.248746,60.327801,0.156143
4,45.787926,0.246922,50.431835,0.197173


##### TESTING for EVENLY distributed classes

In [12]:
model = build_model(num_features = X_train_uniform.shape[1], n_classes = len(Y_train['music_category'].unique()))
model.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                253       
                                                                 
Total params: 253 (1012.00 Byte)
Trainable params: 253 (1012.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
# Fit the model.
history = model.fit(
  x = X_train_uniform,   # training examples
  y = Y_train_uniform,   #labels
  epochs=5,             # number of passes through the training data
  batch_size=64,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history = pd.DataFrame(history.history)
display(history)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,63.022778,0.106127,59.820801,0.0
1,59.890419,0.101204,40.252224,0.0
2,55.38031,0.128556,44.255928,0.0
3,55.366337,0.11798,53.5616,0.0
4,54.627541,0.138767,45.640556,0.091803


### Feature Scaling & Normalization

In [14]:
X_train.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,...,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0
mean,0.559497,0.631523,-8.680851,0.634368,0.090135,0.336522,0.189773,0.219151,0.468492,122.162031,...,0.105059,0.030004,0.080094,0.08112,0.067463,0.120357,0.059825,0.102825,0.065548,0.077723
std,0.179313,0.260233,5.378897,0.481613,0.119129,0.342108,0.336052,0.197811,0.265692,30.281405,...,0.306633,0.1706,0.271442,0.273022,0.250825,0.325383,0.237166,0.303733,0.247493,0.267738
min,0.0,2e-05,-49.531,0.0,0.0,0.0,0.0,0.00925,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.447,0.449,-10.555,0.0,0.0362,0.0168,0.0,0.0982,0.244,99.653,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.575,0.674,-7.288,1.0,0.0492,0.201,8.2e-05,0.132,0.458,122.023,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.691,0.855,-5.185,1.0,0.0879,0.642,0.178,0.282,0.684,140.279,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.984,1.0,3.156,1.0,0.965,0.996,1.0,0.995,0.994,243.372,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
# YOUR CODE HERE
X_train_features_norm = (X_train - X_train.mean())/X_train.std() 
X_test_features_norm = (X_test - X_train.mean())/X_train.std()
#only train features shown since we're not supposed to look at test
X_train_features_norm.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,...,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0,43861.0
mean,2.265553e-16,1.840306e-16,-1.31786e-16,9.152930000000001e-17,-1.084582e-16,9.719925e-18,3.758371e-17,-8.310536e-17,-8.990931e-18,-2.230723e-16,...,3.693572e-17,1.2878900000000002e-17,-3.337174e-17,-3.5963720000000005e-17,2.8268780000000005e-17,-6.317951e-17,-6.917347e-17,-2.3327820000000003e-17,2.1343340000000002e-17,2.6972790000000004e-17
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-3.120231,-2.426687,-7.594522,-1.317174,-0.7566175,-0.9836694,-0.564715,-1.061119,-1.763288,-4.034226,...,-0.3426217,-0.1758728,-0.2950687,-0.2971183,-0.2689647,-0.369895,-0.2522513,-0.3385364,-0.2648477,-0.2902943
25%,-0.6273785,-0.7013853,-0.3484262,-1.317174,-0.4527447,-0.9345622,-0.564715,-0.6114464,-0.8449324,-0.7433285,...,-0.3426217,-0.1758728,-0.2950687,-0.2971183,-0.2689647,-0.369895,-0.2522513,-0.3385364,-0.2648477,-0.2902943
50%,0.08645854,0.1632256,0.2589474,0.7591836,-0.3436191,-0.3961364,-0.5644724,-0.4405759,-0.03948902,-0.004591286,...,-0.3426217,-0.1758728,-0.2950687,-0.2971183,-0.2689647,-0.369895,-0.2522513,-0.3385364,-0.2648477,-0.2902943
75%,0.7333734,0.8587571,0.6499198,0.7591836,-0.01876059,0.8929285,-0.03503475,0.3177252,0.8111194,0.5982869,...,-0.3426217,-0.1758728,-0.2950687,-0.2971183,-0.2689647,-0.369895,-0.2522513,-0.3385364,-0.2648477,-0.2902943
max,2.367391,1.415951,2.20061,0.7591836,7.343859,1.927688,2.411017,3.922183,1.977883,4.002786,...,2.918604,5.685797,3.388964,3.365587,3.717875,2.703408,3.96421,2.953824,3.775669,3.444701


### Model 2

In [16]:
def build_model2(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))

    #adding extra layer
    model.add(keras.layers.Dense(
            units=256,
            activation = 'relu'))

    #add extra layer
    model.add(keras.layers.Dense(
            units=128,
            activation = 'relu'))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [17]:
model2 = build_model2(num_features = X_train_features_norm.shape[1], n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               5888      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 11)                1419      
                                                                 
Total params: 40203 (157.04 KB)
Trainable params: 40203 (157.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
# Fit the model.
history2 = model2.fit(
  x = X_train_features_norm,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.879662,0.400111,1.737988,0.437429
1,1.684094,0.449992,1.638613,0.462959
2,1.605777,0.467675,1.577859,0.472988
3,1.557926,0.478087,1.539026,0.485525
4,1.527143,0.483888,1.513783,0.49054
5,1.505979,0.488549,1.49479,0.49282
6,1.49007,0.491463,1.481777,0.496011
7,1.477512,0.493413,1.469659,0.495783
8,1.467154,0.494604,1.460514,0.498518
9,1.458193,0.495617,1.452066,0.499202


### Testing - Feature Selection

In [19]:
X_train_features_norm.columns

Index(['danceability', 'energy', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'C',
       'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'],
      dtype='object')

In [20]:
features = ['danceability','energy', 'loudness', 'valence', 'tempo']
X_train_features_norm = X_train_features_norm[features]

In [21]:
X_train_features_norm.head()

Unnamed: 0,danceability,energy,loudness,valence,tempo
0,0.822603,-0.505407,0.539302,0.43098,0.257847
1,-0.281614,-0.209518,-0.300833,-0.94279,-1.258661
2,0.939717,-0.451609,0.405632,1.514941,1.581597
3,-2.339472,1.269928,0.636906,-1.508482,-0.932917
4,-0.103154,0.543654,0.559567,0.31054,0.915577


In [22]:
model2 = build_model2(num_features = len(features), n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               1536      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 11)                1419      
                                                                 
Total params: 35851 (140.04 KB)
Trainable params: 35851 (140.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
# Fit the model.
history2 = model2.fit(
  x = X_train_features_norm,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.953322,0.40989,1.807134,0.423296
1,1.759467,0.433703,1.722408,0.439024
2,1.694182,0.443076,1.671912,0.444267
3,1.653819,0.451259,1.639282,0.453613
4,1.627855,0.45554,1.617747,0.455437
5,1.610356,0.457871,1.602321,0.455892
6,1.597411,0.458732,1.591444,0.456576
7,1.587393,0.459492,1.582347,0.45612
8,1.579276,0.459163,1.575149,0.456348
9,1.572511,0.459568,1.568691,0.45612


In [24]:
X_test_features_norm = X_test_features_norm[features]
model2.predict(X_test_features_norm)



array([[1.7805724e-01, 3.2493941e-02, 2.7243325e-01, ..., 9.7512370e-03,
        2.6788259e-02, 7.6217271e-02],
       [4.7449020e-04, 7.4014475e-04, 9.2279792e-01, ..., 1.0444897e-06,
        4.5740462e-06, 4.1233111e-04],
       [3.1818338e-02, 4.0612927e-01, 3.3962837e-01, ..., 2.0707579e-02,
        2.4793874e-02, 1.0751469e-02],
       ...,
       [4.9012177e-02, 2.5576057e-02, 7.4208909e-01, ..., 8.2088076e-04,
        2.6559534e-03, 1.6081037e-02],
       [9.9536054e-02, 3.8944924e-01, 3.0424845e-01, ..., 6.1505740e-03,
        9.3463715e-03, 5.4412908e-03],
       [5.6533981e-02, 3.7766442e-01, 4.0142259e-01, ..., 7.4691400e-03,
        9.7278552e-03, 9.9529522e-03]], dtype=float32)

In [25]:
print("Evaluate on test data")
results = model2.evaluate(X_test_features_norm, Y_test, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [1.580074667930603, 0.456392377614975]
