In [1]:
# Import the libraries
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns  # for nicer plots
sns.set(style="darkgrid")  # default style

import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, classification_report




In [2]:
#load data
X_train = pd.read_csv('../data/processed/X_train.csv')
Y_train = pd.read_csv('../data/processed/Y_train.csv')
X_val = pd.read_csv('../data/processed/X_val.csv')
Y_val = pd.read_csv('../data/processed/Y_val.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')
Y_test = pd.read_csv('../data/processed/Y_test.csv')

### Implementing Baseline

In [3]:
#show class distribution
class_counts = Y_train.value_counts().reset_index()
class_counts

Unnamed: 0,music_category,count
0,0,11662
1,2,5106
2,6,2646
3,5,2026
4,4,1787
5,7,1687
6,1,1292
7,3,1010
8,9,533
9,10,356


Since class distribution isn't too unbalanced, implement majority baseline and stratified baseline. ## EDIT class majority big now

In [4]:
#majority baseline implementation
dummy_clf = DummyClassifier(strategy = 'stratified', random_state = 42)
dummy_clf.fit(X_train, Y_train)
Y_pred = dummy_clf.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
report = classification_report(Y_test, Y_pred)

print(f'Baseline Classifier Accuracy: {accuracy}')
print(f'Classification Report: {report}')

Baseline Classifier Accuracy: 0.22592905405405406
Classification Report:               precision    recall  f1-score   support

           0       0.41      0.42      0.41      3894
           1       0.04      0.03      0.03       462
           2       0.18      0.17      0.17      1695
           3       0.03      0.04      0.04       321
           4       0.06      0.07      0.06       538
           5       0.07      0.07      0.07       651
           6       0.10      0.10      0.10       917
           7       0.05      0.05      0.05       556
           8       0.02      0.02      0.02       117
           9       0.02      0.02      0.02       203
          10       0.00      0.00      0.00       118

    accuracy                           0.23      9472
   macro avg       0.09      0.09      0.09      9472
weighted avg       0.23      0.23      0.23      9472



#### Testing for uniform distribution of classes

In [5]:
min_count = class_counts['count'].min()
result = {}
unique_classes = np.unique(Y_train)
for value in unique_classes:
    indices = np.where(Y_train==value)[0][:min_count]
    result[value] = indices.tolist()
index_values = list(result.values())
index_values = [element for nestedlist in index_values for element in nestedlist]
X_train_uniform = X_train.iloc[index_values]
Y_train_uniform = Y_train.iloc[index_values]

In [6]:
X_train_uniform.info(20)

<class 'pandas.core.frame.DataFrame'>
Index: 3432 entries, 3 to 24809
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      3432 non-null   float64
 1   energy            3432 non-null   float64
 2   loudness          3432 non-null   float64
 3   mode              3432 non-null   float64
 4   speechiness       3432 non-null   float64
 5   acousticness      3432 non-null   float64
 6   instrumentalness  3432 non-null   float64
 7   liveness          3432 non-null   float64
 8   valence           3432 non-null   float64
 9   tempo             3432 non-null   float64
 10  C                 3432 non-null   float64
 11  C#                3432 non-null   float64
 12  D                 3432 non-null   float64
 13  D#                3432 non-null   float64
 14  E                 3432 non-null   float64
 15  F                 3432 non-null   float64
 16  F#                3432 non-null   float64
 17 

### Building a Model

In [7]:
len(Y_train['music_category'].unique())

11

In [8]:
X_train.shape

(28417, 22)

In [9]:
def build_model(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [10]:
model = build_model(num_features = X_train.shape[1], n_classes = len(Y_train['music_category'].unique()))
model.summary()


Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                253       
                                                                 
Total params: 253 (1012.00 Byte)
Trainable params: 253 (1012.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
# Fit the model.
history = model.fit(
  x = X_train,   # training examples
  y = Y_train,   #labels
  epochs=5,             # number of passes through the training data
  batch_size=64,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history = pd.DataFrame(history.history)
display(history)

Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,53.496414,0.232532,65.18792,0.179099
1,50.150688,0.236637,29.540846,0.376847
2,47.016582,0.24434,74.336624,0.179099
3,48.516682,0.240782,50.479492,0.095004
4,47.28849,0.243363,45.55571,0.409219


##### TESTING for EVENLY distributed classes

In [12]:
model = build_model(num_features = X_train_uniform.shape[1], n_classes = len(Y_train['music_category'].unique()))
model.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                253       
                                                                 
Total params: 253 (1012.00 Byte)
Trainable params: 253 (1012.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [13]:
# Fit the model.
history = model.fit(
  x = X_train_uniform,   # training examples
  y = Y_train_uniform,   #labels
  epochs=5,             # number of passes through the training data
  batch_size=64,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history = pd.DataFrame(history.history)
display(history)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,65.320732,0.099093,35.850525,0.0
1,62.867973,0.100389,41.044411,0.0
2,63.715942,0.096826,43.294044,0.0
3,65.129005,0.097798,52.317127,0.0
4,63.086445,0.09715,29.559252,0.0


### Feature Scaling & Normalization

In [14]:
X_train.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,...,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0
mean,0.563809,0.656935,-8.183055,0.633952,0.100604,0.297483,0.189521,0.210896,0.458677,123.132188,...,0.101066,0.029243,0.07865,0.080269,0.07221,0.115424,0.063554,0.098321,0.067108,0.07872
std,0.17926,0.255036,4.942298,0.481731,0.137034,0.330553,0.33266,0.184895,0.259785,29.673745,...,0.301422,0.16849,0.269197,0.271714,0.25884,0.319538,0.24396,0.297754,0.250213,0.269307
min,0.0,2e-05,-40.558,0.0,0.0,0.0,0.0,0.0116,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.448,0.481,-10.104,0.0,0.0365,0.00834,0.0,0.0974,0.242,100.013,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.574,0.702,-6.95,1.0,0.0521,0.14,0.000126,0.131,0.443,122.392,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.696,0.877,-4.899,1.0,0.0993,0.569,0.192,0.275,0.666,141.868,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.984,1.0,4.532,1.0,0.963,0.996,0.999,0.994,0.995,220.081,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
# YOUR CODE HERE
X_train_features_norm = (X_train - X_train.mean())/X_train.std() 
X_test_features_norm = (X_test - X_train.mean())/X_train.std()
#only train features shown since we're not supposed to look at test
X_train_features_norm.describe()

Unnamed: 0,danceability,energy,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,D,D#,E,F,F#,G,G#,A,A#,B
count,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,...,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0,28417.0
mean,-4.870807e-16,-4.150688e-17,-4.0506710000000005e-17,-1.1876970000000002e-17,-5.000829e-18,-2.142855e-16,-8.351384000000001e-17,5.300878e-17,-1.620268e-16,1.250207e-16,...,-4.8633060000000005e-17,-2.312883e-17,-4.650771e-17,-1.5002490000000002e-17,-3.6756090000000006e-17,4.3757250000000003e-17,4.1006790000000005e-17,-4.8508040000000006e-17,-5.375891e-18,-4.975824e-17
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-3.145199,-2.575772,-6.550585,-1.315985,-0.7341533,-0.8999554,-0.5697121,-1.077886,-1.765603,-4.149533,...,-0.3352985,-0.1735595,-0.292166,-0.295417,-0.2789762,-0.3612206,-0.2605077,-0.3302103,-0.2682025,-0.2923079
25%,-0.6460406,-0.6898438,-0.3886744,-1.315985,-0.4677962,-0.874725,-0.5697121,-0.6138398,-0.834063,-0.7791126,...,-0.3352985,-0.1735595,-0.292166,-0.295417,-0.2789762,-0.3612206,-0.2605077,-0.3302103,-0.2682025,-0.2923079
50%,0.05684784,0.1766999,0.2494903,0.7598601,-0.353956,-0.4764233,-0.5693333,-0.4321153,-0.06034582,-0.0249442,...,-0.3352985,-0.1735595,-0.292166,-0.295417,-0.2789762,-0.3612206,-0.2605077,-0.3302103,-0.2682025,-0.2923079
75%,0.7374224,0.8628771,0.6644794,0.7598601,-0.009516095,0.8214003,0.007453161,0.3467043,0.7980568,0.6313936,...,-0.3352985,-0.1735595,-0.292166,-0.295417,-0.2789762,-0.3612206,-0.2605077,-0.3302103,-0.2682025,-0.2923079
max,2.344025,1.345162,2.572701,0.7598601,6.293295,2.113173,2.433351,4.235394,2.064489,3.267158,...,2.982312,5.761509,3.422591,3.384927,3.584409,2.768294,3.838522,3.028267,3.728395,3.42093


### Model 2

In [16]:
def build_model2(num_features, n_classes,learning_rate=0.01):
    """Build a TF logistic regression model using Keras.
    
    Args:
    learning_rate: The desired learning rate for SGD.
    
    Returns:
    model: A tf.keras model (graph).
    """
    # Random Seed + Clear Session
    tf.keras.backend.clear_session()
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential.
    model = keras.Sequential(name = 'Genres')
    
    # Keras layers processing
    model.add(tf.keras.layers.InputLayer(input_shape=num_features))

    #adding extra layer
    model.add(keras.layers.Dense(
            units=256,
            activation = 'relu'))

    #add extra layer
    model.add(keras.layers.Dense(
            units=128,
            activation = 'relu'))
    
    # This layer constructs the linear set of parameters for each input feature
    # (as well as a bias), and applies a sigmoid to the result. The result is
    # binary logistic regression.
    model.add(keras.layers.Dense(
      units=n_classes,
      activation='softmax'))

    # Use the SGD optimizer as usual.
    optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    
    # We specify the binary_crossentropy loss (equivalent to log loss).
    # Notice that we are including 'binary accuracy' as one of the metrics that we
    # ask Tensorflow to report when evaluating the model.
    model.compile(loss='sparse_categorical_crossentropy', 
                optimizer=optimizer, 
                metrics=['accuracy'])
    
    return model

In [17]:
model2 = build_model2(num_features = X_train_features_norm.shape[1], n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               5888      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 11)                1419      
                                                                 
Total params: 40203 (157.04 KB)
Trainable params: 40203 (157.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
# Fit the model.
history2 = model2.fit(
  x = X_train_features_norm,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,1.538801,0.461036,1.55143,0.459536
1,1.531638,0.461662,1.544807,0.461647
2,1.525633,0.463382,1.53993,0.461295
3,1.520331,0.462952,1.534577,0.466573
4,1.515589,0.464907,1.529871,0.464462
5,1.511412,0.464712,1.526647,0.464462
6,1.507669,0.465572,1.523028,0.464462
7,1.504312,0.465337,1.520081,0.464462
8,1.501311,0.465846,1.5173,0.463406
9,1.498482,0.465455,1.514773,0.465869


### Testing - Feature Selection

In [19]:
X_train_features_norm.columns

Index(['danceability', 'energy', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'C',
       'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'],
      dtype='object')

In [20]:
features = ['danceability','energy', 'loudness', 'valence', 'tempo']
X_train_features_norm = X_train_features_norm[features]

In [21]:
X_train_features_norm.head()

Unnamed: 0,danceability,energy,loudness,valence,tempo
0,1.072131,-0.544766,0.254751,-0.726281,-0.542472
1,-0.958435,0.243357,0.501195,1.367759,0.576463
2,0.246516,0.541354,0.649304,1.733446,-0.112395
3,1.819647,0.133569,-0.211227,0.925085,-0.779517
4,1.088867,-0.685923,-0.189374,1.221484,-1.453682


In [22]:
model2 = build_model2(num_features = len(features), n_classes = len(Y_train['music_category'].unique()))
model2.summary()

Model: "Genres"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               1536      
                                                                 
 dense_1 (Dense)             (None, 128)               32896     
                                                                 
 dense_2 (Dense)             (None, 11)                1419      
                                                                 
Total params: 35851 (140.04 KB)
Trainable params: 35851 (140.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
# Fit the model.
history2 = model2.fit(
  x = X_train_features_norm,   # training examples
  y = Y_train,   #labels
  epochs=10,             # number of passes through the training data
  batch_size=128,        # mini-batch size for SGD
  validation_split=0.1, # use a fraction of the examples for validation -- DO WE STILL NEED THIS IF VALIDATION SPLIT
  verbose=1             # display some progress output during training
  )

# Convert the return value into a DataFrame so we can see the train loss 
# and binary accuracy after every epoch.
history2 = pd.DataFrame(history2.history)
display(history2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,2.003763,0.394252,1.853274,0.409571
1,1.79573,0.414194,1.765546,0.416256
2,1.721827,0.428778,1.703665,0.427164
3,1.668113,0.44434,1.659156,0.438424
4,1.630212,0.452825,1.628153,0.444757
5,1.603473,0.455562,1.606656,0.452146
6,1.583848,0.457126,1.590304,0.455313
7,1.568849,0.458964,1.577714,0.454609
8,1.556918,0.459668,1.567243,0.457776
9,1.54703,0.460762,1.558619,0.459536


In [24]:
X_test_features_norm = X_test_features_norm[features]
model2.predict(X_test_features_norm)



array([[3.92012268e-01, 1.19313225e-01, 1.42258465e-01, ...,
        2.29995716e-02, 4.59272563e-02, 1.31064253e-02],
       [2.38196760e-01, 8.38955212e-03, 2.99421072e-01, ...,
        3.66132590e-03, 5.28359879e-03, 2.78711156e-03],
       [3.53327572e-01, 3.53052877e-02, 3.63502175e-01, ...,
        1.30118579e-02, 5.88608123e-02, 2.04168241e-02],
       ...,
       [4.19900566e-01, 1.31234556e-01, 9.30924192e-02, ...,
        2.28006411e-02, 2.69502196e-02, 8.35101400e-03],
       [4.54893827e-01, 2.34331377e-02, 9.40177590e-02, ...,
        9.22742300e-03, 1.46679934e-02, 2.83613689e-02],
       [7.52183199e-01, 1.45415461e-03, 1.15192533e-01, ...,
        4.75296925e-04, 1.96069060e-03, 6.00922741e-02]], dtype=float32)

In [25]:
print("Evaluate on test data")
results = model2.evaluate(X_test_features_norm, Y_test, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [1.548405647277832, 0.4649493098258972]
