# Genre classification using Essentia descriptors

In [61]:
import pandas as pd
from scipy.io import arff
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import numpy as np

## Data preprocessing

In [62]:
arff_train = arff.loadarff('GenreTrain.arff')
arff_test = arff.loadarff('GenreTest.arff')

In [63]:
df_train = pd.DataFrame(arff_train[0])
df_train.head()

Unnamed: 0,highlevel_danceability_value,highlevel_equalization_profile_value,highlevel_excitement_value,highlevel_intensity_value,highlevel_speech_music_value,highlevel_voice_instrumental_value,loudness_dynamic_complexity_dvar,loudness_dynamic_complexity_mean,loudness_dynamic_complexity_var,loudness_larm_dvar,...,tonal_chords_strength_dvar,tonal_chords_strength_mean,tonal_chords_strength_var,tonal_dissonance_dvar,tonal_dissonance_mean,tonal_dissonance_var,tonal_key_mode_value,tonal_key_strength_value,tonal_tuning_equal_tempered_deviation_value,genre
0,0.206472,0.372766,0.029087,0.093526,b'music',b'voice',-0.411385,0.024215,-0.201421,-0.532833,...,-0.255337,1.140774,-1.490395,0.494393,0.149725,-0.013634,b'minor',0.526779,-0.205793,b'blu'
1,-0.368479,0.372766,1.341977,0.093526,b'music',b'voice',-0.349921,-0.015672,-0.173045,-0.452769,...,-0.121114,0.497749,-0.901662,0.347729,-0.51764,0.184709,b'major',-0.129683,-0.667979,b'blu'
2,-0.631747,0.372766,-1.283803,-1.324366,b'speech',b'instrumental',4.235612,5.779127,3.973258,1.713613,...,-0.190639,1.408974,-0.502029,1.174496,-1.45416,2.144489,b'major',-1.022781,0.937795,b'blu'
3,-0.190116,0.372766,1.341977,0.093526,b'music',b'voice',-0.307576,0.068818,-0.216986,-0.519222,...,0.257633,0.622749,1.399033,0.791524,-0.601227,0.873673,b'major',0.074454,-0.406373,b'blu'
4,0.588278,0.372766,1.341977,0.802473,b'music',b'instrumental',-0.332032,-1.078238,0.35068,-0.765884,...,-0.309481,1.301291,-0.708195,-0.093155,0.287901,-0.644858,b'major',0.919307,-0.917709,b'blu'


In [64]:
df_test = pd.DataFrame(arff_test[0])
df_test.head()

Unnamed: 0,highlevel_danceability_value,highlevel_equalization_profile_value,highlevel_excitement_value,highlevel_intensity_value,highlevel_speech_music_value,highlevel_voice_instrumental_value,loudness_dynamic_complexity_dvar,loudness_dynamic_complexity_mean,loudness_dynamic_complexity_var,loudness_larm_dvar,...,tonal_chords_strength_dvar,tonal_chords_strength_mean,tonal_chords_strength_var,tonal_dissonance_dvar,tonal_dissonance_mean,tonal_dissonance_var,tonal_key_mode_value,tonal_key_strength_value,tonal_tuning_equal_tempered_deviation_value,genre
0,0.697263,-1.940715,0.029087,-1.324366,b'music',b'instrumental',-0.241667,-1.146509,0.325486,-0.191229,...,0.080969,-0.778015,0.863776,0.341611,0.055081,0.627751,b'major',-0.313933,0.435349,b'blu'
1,0.262506,0.372766,0.029087,1.511419,b'music',b'instrumental',0.022516,0.059654,-0.077408,-0.001814,...,0.280603,0.073804,0.560035,-0.618364,0.597166,-0.69584,b'major',0.449612,1.064124,b'blu'
2,2.287425,0.372766,1.341977,1.511419,b'music',b'instrumental',0.018952,-1.078183,0.299223,-0.76412,...,-0.073297,-1.347689,-0.498906,-0.705411,0.669616,-0.995246,b'major',-0.192094,-0.758522,b'blu'
3,0.043504,0.372766,0.029087,0.802473,b'music',b'instrumental',-0.099142,-1.303403,0.389536,-0.731556,...,0.044684,-0.822402,-0.035701,-0.121474,0.486625,-0.637245,b'major',-0.211747,0.040954,b'blu'
4,-1.16006,0.372766,-1.283803,-0.61542,b'music',b'voice',0.647357,0.200046,0.186096,0.776741,...,-0.392429,-0.32205,-0.019565,0.664354,-1.750386,1.117032,b'major',0.766112,-0.257589,b'blu'


In [65]:
df_train_encoded = df_train.copy()
categorical_cols = [col for col in df_train.columns if isinstance(df_train[col].iloc[0], bytes)]

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_train_encoded[col] = le.fit_transform(df_train_encoded[col])
    label_encoders[col] = le

In [66]:
df_test_encoded = df_test.copy()
categorical_cols = [col for col in df_test.columns if isinstance(df_test[col].iloc[0], bytes)]

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df_test_encoded[col] = le.fit_transform(df_test_encoded[col])
    label_encoders[col] = le

In [67]:
X_train = df_train_encoded.drop(columns=['genre'])
y_train = np.array(df_train_encoded['genre'].tolist()).reshape(-1, 1)
encoder = OneHotEncoder(sparse_output=False)
y_train = encoder.fit_transform(y_train)

In [68]:
X_train.tail()

Unnamed: 0,highlevel_danceability_value,highlevel_equalization_profile_value,highlevel_excitement_value,highlevel_intensity_value,highlevel_speech_music_value,highlevel_voice_instrumental_value,loudness_dynamic_complexity_dvar,loudness_dynamic_complexity_mean,loudness_dynamic_complexity_var,loudness_larm_dvar,...,tonal_chords_number_rate_value,tonal_chords_strength_dvar,tonal_chords_strength_mean,tonal_chords_strength_var,tonal_dissonance_dvar,tonal_dissonance_mean,tonal_dissonance_var,tonal_key_mode_value,tonal_key_strength_value,tonal_tuning_equal_tempered_deviation_value
795,-1.427786,0.372766,-1.283803,0.802473,0,0,0.559649,-0.183165,0.622257,0.084044,...,-0.636402,-0.366172,0.318132,0.748104,-0.74567,-0.226276,-0.095535,0,1.322209,0.061931
796,-0.07854,0.372766,1.341977,0.802473,0,1,-0.10834,-0.58467,0.119356,-0.314633,...,-1.021915,-0.284281,0.096984,-0.089204,-1.394305,1.20624,-1.225032,0,0.498037,-0.269263
797,-1.045058,0.372766,-1.283803,0.093526,0,0,-0.156503,-0.975097,0.297968,-0.315637,...,-0.25089,-0.408811,-1.467413,-0.965934,-0.713245,0.178487,0.002003,0,-0.624586,-1.07998
798,-0.227043,0.372766,-1.283803,0.802473,0,0,-0.125633,-1.068999,0.27443,-0.725643,...,-0.636402,-0.329513,1.205735,-0.789754,-0.415943,0.386272,-0.764152,0,0.417567,-0.522861
799,-0.457565,0.372766,0.029087,0.802473,0,1,-0.132634,-0.496725,0.047171,-0.48381,...,-0.25089,-0.302568,-1.952925,-1.420453,-0.51102,0.323766,-0.527826,0,-0.398319,-0.424525


In [69]:
X_test = df_test_encoded.drop(columns=['genre'])
y_test = np.array(df_test_encoded['genre'].tolist()).reshape(-1, 1)
encoder = OneHotEncoder(sparse_output=False)
y_test = encoder.fit_transform(y_test)

In [70]:
X_test.head()

Unnamed: 0,highlevel_danceability_value,highlevel_equalization_profile_value,highlevel_excitement_value,highlevel_intensity_value,highlevel_speech_music_value,highlevel_voice_instrumental_value,loudness_dynamic_complexity_dvar,loudness_dynamic_complexity_mean,loudness_dynamic_complexity_var,loudness_larm_dvar,...,tonal_chords_number_rate_value,tonal_chords_strength_dvar,tonal_chords_strength_mean,tonal_chords_strength_var,tonal_dissonance_dvar,tonal_dissonance_mean,tonal_dissonance_var,tonal_key_mode_value,tonal_key_strength_value,tonal_tuning_equal_tempered_deviation_value
0,0.697263,-1.940715,0.029087,-1.324366,0,0,-0.241667,-1.146509,0.325486,-0.191229,...,1.67667,0.080969,-0.778015,0.863776,0.341611,0.055081,0.627751,0,-0.313933,0.435349
1,0.262506,0.372766,0.029087,1.511419,0,0,0.022516,0.059654,-0.077408,-0.001814,...,-0.636402,0.280603,0.073804,0.560035,-0.618364,0.597166,-0.69584,0,0.449612,1.064124
2,2.287425,0.372766,1.341977,1.511419,0,0,0.018952,-1.078183,0.299223,-0.76412,...,-1.021915,-0.073297,-1.347689,-0.498906,-0.705411,0.669616,-0.995246,0,-0.192094,-0.758522
3,0.043504,0.372766,0.029087,0.802473,0,0,-0.099142,-1.303403,0.389536,-0.731556,...,0.134622,0.044684,-0.822402,-0.035701,-0.121474,0.486625,-0.637245,0,-0.211747,0.040954
4,-1.16006,0.372766,-1.283803,-0.61542,0,1,0.647357,0.200046,0.186096,0.776741,...,-0.636402,-0.392429,-0.32205,-0.019565,0.664354,-1.750386,1.117032,0,0.766112,-0.257589


In [222]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [223]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [224]:
pca = PCA(n_components=5)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

## Logistic regression

In [72]:
accuracy = dict()

In [73]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [74]:
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)

ValueError: y should be a 1d array, got an array of shape (800, 10) instead.

In [45]:
y_pred = lr.predict(X_test)

In [46]:
accuracy['Linear regression'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['Linear regression']:.4f}")

ValueError: Classification metrics can't handle a mix of multiclass and continuous targets

## Support vector machine

In [230]:
from sklearn.svm import SVC

In [231]:
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train, y_train)

In [232]:
y_pred = svm_clf.predict(X_test)

In [233]:
accuracy['Support vector machine'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['Support vector machine']:.4f}")

Test Accuracy: 0.9326


## Decision tree classifier

In [234]:
from sklearn.tree import DecisionTreeClassifier

In [235]:
dt_clf = DecisionTreeClassifier(criterion='gini', max_depth=None, random_state=42)
dt_clf.fit(X_train, y_train)

In [236]:
y_pred = dt_clf.predict(X_test)

In [237]:
accuracy['Decision tree'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['Decision tree']:.4f}")

Test Accuracy: 1.0000


## k-nearest neighbors

In [238]:
from sklearn.neighbors import KNeighborsClassifier

In [239]:
knn_clf = KNeighborsClassifier(n_neighbors=1, metric='euclidean')
knn_clf.fit(X_train, y_train)

In [240]:
y_pred = knn_clf.predict(X_test)

In [241]:
accuracy['k-nearest neighbors'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['k-nearest neighbors']:.4f}")

Test Accuracy: 1.0000


## Artificial neural networks

In [242]:
from sklearn.neural_network import MLPClassifier

In [243]:
ann_clf = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
ann_clf.fit(X_train, y_train)

In [244]:
y_pred = ann_clf.predict(X_test)

In [245]:
accuracy['Artificial neural networks'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['Artificial neural networks']:.4f}")

Test Accuracy: 1.0000


## Random forest (Ensemble methods)

In [246]:
from sklearn.ensemble import RandomForestClassifier

In [247]:
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

In [248]:
y_pred = rf_clf.predict(X_test)

In [249]:
accuracy['Random forest'] = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy['Random forest']:.4f}")

Test Accuracy: 1.0000


## Result

In [250]:
df_accuracy = pd.DataFrame(list(accuracy.items()), columns=['Model', 'Accuracy'])
df_accuracy

Unnamed: 0,Model,Accuracy
0,Linear regression,1.0
1,Support vector machine,0.932642
2,Decision tree,1.0
3,k-nearest neighbors,1.0
4,Artificial neural networks,1.0
5,Random forest,1.0
