In [None]:
# %pip install numpy
# %pip install matplotlib
# %pip install torch

In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import random

In [3]:
dataset_path = "../../Data/features_30_sec.csv"

data = pd.read_csv(dataset_path)

spectral_features = [
    "chroma_stft_mean", "chroma_stft_var",
    "spectral_centroid_mean", "spectral_centroid_var",
    "rolloff_mean", "rolloff_var"
]
rhythmic_features = ["tempo"]
mfcc_features = [
    "mfcc1_mean", "mfcc1_var",
    "mfcc2_mean", "mfcc2_var",
    "mfcc3_mean", "mfcc3_var"
]

selected_features = spectral_features = rhythmic_features + mfcc_features

X = data[selected_features]
y = data["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state=42)
print(X_train)

          tempo  mfcc1_mean    mfcc1_var  mfcc2_mean    mfcc2_var  mfcc3_mean  \
82   123.046875  -29.326496  1405.848877  108.646187   254.188568  -25.548882   
991  143.554688  -74.275742   833.735596  100.125725   444.707764  -47.363968   
789   99.384014  -47.715034  7978.628906   55.567909  1223.721436   19.238752   
894  129.199219  -97.746880  2043.584351  108.831245   829.460754  -24.916161   
398  123.046875  -91.576141  1841.363159   99.457474   424.883850   -8.977487   
..          ...         ...          ...         ...          ...         ...   
106   99.384014 -295.774567  4000.331055  134.538834   307.928497  -22.429554   
270  112.347147  -99.300232  2682.421387  117.564529   569.738525  -29.862644   
860  112.347147 -301.459442  5544.246094   77.230850  1140.099731  -13.612371   
435  143.554688 -100.842010  3069.406494   64.214058  1030.421509   12.818649   
102   99.384014 -235.253723   857.942932  141.109985   197.688080  -34.670734   

      mfcc3_var  
82   268.

In [4]:
# scale/standardize data for both training and testing data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# print the training data to confirm data standardization
print(X_train)

[[ 0.14352417  1.13800251 -0.86963696 ... -1.0421225  -0.78795286
  -0.70835186]
 [ 0.87220839  0.69833667 -1.07468124 ... -0.62056613 -1.77973356
  -0.62060565]
 [-0.69726531  0.95813715  1.48603457 ...  1.10313509  1.24823027
   0.57639908]
 ...
 [-0.2366589  -1.52383436  0.61355652 ...  0.91810775 -0.24528252
   0.18861867]
 [ 0.87220839  0.43848179 -0.27342116 ...  0.67542589  0.95635271
   0.94714132]
 [-0.69726531 -0.8762508  -1.06600538 ... -1.16713951 -1.20266025
  -1.11327342]]


In [None]:
# test different kernels, linear, polynomial, rbf, and sigmoid

svm_kernels = ["linear", "poly", "rbf", "sigmoid"]

svm_c_values = [0.1, 1, 5, 10, 50, 100]

# test different svm kernels
print("kernel: linear")
for C in svm_c_values:
    model = SVC(C=C, kernel="linear")
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_predict)
    print(f"C: {C} accuracy: {accuracy:.3f}")

print("\nkernel: polynomial")
for C in svm_c_values:
    model = SVC(C=C, kernel="poly")
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_predict)
    print(f"C: {C} accuracy: {accuracy:.3f}")

print("\nkernel: rbf")
for C in svm_c_values:
    model = SVC(C=C, kernel="rbf")
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_predict)
    print(f"C: {C} accuracy: {accuracy:.3f}")

print("\nkernel: sigmoid")
for C in svm_c_values:
    model = SVC(C=C, kernel="sigmoid")
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_predict)
    print(f"C: {C} accuracy: {accuracy:.3f}")

kernel: linear
C: 0.1 accuracy: 0.416
C: 1 accuracy: 0.440
C: 5 accuracy: 0.432
C: 10 accuracy: 0.432
C: 50 accuracy: 0.432
C: 100 accuracy: 0.432

kernel: polynomial
C: 0.1 accuracy: 0.400
C: 1 accuracy: 0.412
C: 5 accuracy: 0.424
C: 10 accuracy: 0.436
C: 50 accuracy: 0.428
C: 100 accuracy: 0.436

kernel: rbf
C: 0.1 accuracy: 0.416
C: 1 accuracy: 0.484
C: 5 accuracy: 0.504
C: 10 accuracy: 0.500
C: 50 accuracy: 0.504
C: 100 accuracy: 0.512

kernel: sigmoid
C: 0.1 accuracy: 0.348
C: 1 accuracy: 0.332
C: 5 accuracy: 0.284
C: 10 accuracy: 0.256
C: 50 accuracy: 0.228
C: 100 accuracy: 0.212


In [17]:
print("\npolynomial with different degrees")
degrees = [1, 3, 5, 10]
for C in svm_c_values:
    for degree in degrees:
        model = SVC(C=C, kernel="poly", degree=degree)
        model.fit(X_train, y_train)
        y_predict = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_predict)
        print(f"C: {C} degree: {degree} accuracy: {accuracy:.3f}")
    print("\n")


polynomial with different degrees
C: 0.1 degree: 1 accuracy: 0.400
C: 0.1 degree: 3 accuracy: 0.284
C: 0.1 degree: 5 accuracy: 0.208
C: 0.1 degree: 10 accuracy: 0.148


C: 1 degree: 1 accuracy: 0.412
C: 1 degree: 3 accuracy: 0.372
C: 1 degree: 5 accuracy: 0.296
C: 1 degree: 10 accuracy: 0.252


C: 5 degree: 1 accuracy: 0.424
C: 5 degree: 3 accuracy: 0.436
C: 5 degree: 5 accuracy: 0.364
C: 5 degree: 10 accuracy: 0.244


C: 10 degree: 1 accuracy: 0.436
C: 10 degree: 3 accuracy: 0.440
C: 10 degree: 5 accuracy: 0.392
C: 10 degree: 10 accuracy: 0.232


C: 50 degree: 1 accuracy: 0.428
C: 50 degree: 3 accuracy: 0.436
C: 50 degree: 5 accuracy: 0.428
C: 50 degree: 10 accuracy: 0.272


C: 100 degree: 1 accuracy: 0.436
C: 100 degree: 3 accuracy: 0.472
C: 100 degree: 5 accuracy: 0.416
C: 100 degree: 10 accuracy: 0.296


