In [33]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv1D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns

#load dataset
data_tmp = pd.read_csv("dataset_grouped_selected_balanced.csv")

#drop unneeded columns
data = data_tmp.drop(['number_on_set', 'track_id', 'track_genre', 'track_genre_grouped'], axis = 1)

#get categorical data
cat_cols = data.select_dtypes(include = ['object', 'category']).columns.tolist()

#label encoder
encode_data = LabelEncoder()
for col in cat_cols:
    data[col] = encode_data.fit_transform(data[col])

#split data
X = data.values
y = data_tmp["track_genre_grouped"].values

#encode label
y = encode_data.fit_transform(y)
y = to_categorical(y, num_classes = len(np.unique(y)))

#split 80/20 training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state = 42
)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#scale and reshape to input 1D CNN
scl = StandardScaler()
X_train = scl.fit_transform(X_train)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = scl.transform(X_test)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build model
model = Sequential([
    Conv1D(64, 1, activation = 'relu', input_shape = (X_train.shape[1], 1), padding = 'same'),
    Conv1D(128, 1, activation = 'relu', padding = 'same'),
    Conv1D(256, 1, activation = 'relu', padding = 'same'),
    Flatten(),
    Dense(128, activation = 'relu'),
    Dense(len(y[0]), activation = 'softmax')
])

model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

#use early stopping
early_stop = EarlyStopping(monitor = 'val_loss', patience = 3, restore_best_weights = True)

#train
history = model.fit(
    X_train,
    y_train,
    epochs = 100,
    batch_size = 32,
    validation_split = 0.2,
    callbacks = [early_stop]
)

#evaluate
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {test_acc:.2f}")

#predict
y_pred = np.argmax(model.predict(X_test), axis = 1)
y_true = np.argmax(y_test, axis = 1)

#print report
print(classification_report(y_true, y_pred, target_names = encode_data.classes_))

#print confusion matrix
matrix = confusion_matrix(y_true, y_pred)

plt.figure(figsize = (12, 10))

sns.heatmap(matrix, annot = True, fmt = 'd', cmap = 'Blues', xticklabels = encode_data.classes_, yticklabels = encode_data.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.xticks(rotation = 90)
plt.ylabel("True Label")
plt.yticks(rotation = 0)
plt.show()

#plot for precision, recall, F1-score
report_dict = classification_report(
    y_true, y_pred, target_names = encode_data.classes_, output_dict = True
)
report_df = pd.DataFrame(report_dict).transpose()
metrics_df = report_df.iloc[:-3][['precision', 'recall', 'f1-score']]

metrics_df.plot(kind = 'bar', figsize = (15, 7))
plt.title("Precision, Recall, & F1-Score per Class")
plt.xlabel("Class Label")
plt.ylabel("Score")
plt.ylim(0, 1)
plt.xticks(rotation = 45, ha = 'right')
plt.grid(axis = 'y')
plt.show()

#plot training acc, validation acc, loss
plt.figure(figsize = (12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label = 'Train Accuracy')
plt.plot(history.history['val_accuracy'], label = 'Validation Accuracy')
plt.title('Accuracy vs Epoch')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='best')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label = 'Train Loss')
plt.plot(history.history['val_loss'], label = 'Validation Loss')
plt.title('Loss vs Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='best')
plt.tight_layout()
plt.show()

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3960/3960[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step - accuracy: 0.4364 - loss: 1.7671 - val_accuracy: 0.5750 - val_loss: 1.3012
Epoch 2/100
[1m3487/3960[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m2s[0m 4ms/step - accuracy: 0.5859 - loss: 1.2639

KeyboardInterrupt: 