In [1]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn import metrics
from sklearn.model_selection import LeaveOneOut, train_test_split, KFold
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
import seaborn as sns

In [2]:
# This serves to transpose the dataframe so that each frequency is a feature and each audio file is an entry
def format_df(df, class_label):
    new_df_dict = {}
    frequencies = df["Frequency(Hz)"].unique()
    for frequency in frequencies:
        if not np.isnan(frequency):
            new_df_dict[frequency] = []
            temp_df = df.loc[df["Frequency(Hz)"] == frequency].reset_index()
            for col in temp_df:
                value_list = temp_df[col].values
                val = float(value_list[0])
                new_df_dict[frequency].append(val)
    new_df = pd.DataFrame(new_df_dict)
    new_df["Class"] = class_label
    return new_df

In [3]:
def plot_confusion_matrix(y_true, y_pred, model_name, class_labels):
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Plot confusion matrix
    plt.figure()
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Confusion Matrix for {model_name}')
    plt.show()

In [4]:
def run_models(df):

    # Prep features and target
    features = df.drop(columns=["Class"])
    feature_names = features.columns
    target = df.Class
    class_labels = np.unique(target)
    cv = LeaveOneOut()

    # SVM
    svm = SVC(kernel="linear")
    actual_class = []
    predicted_class = []
    
    for train_index, test_index in cv.split(features):

        # Splitting into training and testing
        X_train, X_test = features.take(train_index), features.take(test_index)
        y_train, y_test = target.take(train_index), target.take(test_index)

        svm = svm.fit(X_train, y_train)
        y_pred = svm.predict(X_test)

        actual_class.append(y_test.item())
        predicted_class.append(y_pred[0])

    print(f"SVM Accuracy: {accuracy_score(actual_class, predicted_class)*100:.2f}%")
    plot_confusion_matrix(actual_class, predicted_class, "SVM", class_labels)

    # Random Forest
    rf = RandomForestClassifier()
    actual_class = []
    predicted_class = []
    
    for train_index, test_index in cv.split(features):
        
        # Splitting into training and testing
        X_train, X_test = features.take(train_index), features.take(test_index)
        y_train, y_test = target.take(train_index), target.take(test_index)

        rf.fit(X_train, y_train)
        y_pred = rf.predict(X_test)

        actual_class.append(y_test.item())
        predicted_class.append(y_pred[0])

    print(f"RF Accuracy: {accuracy_score(actual_class, predicted_class)*100:.2f}%")
    plot_confusion_matrix(actual_class, predicted_class, "RF", class_labels)

    # XGBoost 
    xgb = GradientBoostingClassifier()
    actual_class = []
    predicted_class = []
    
    for train_index, test_index in cv.split(features):
        
        # Splitting into training and testing
        X_train, X_test = features.take(train_index), features.take(test_index)
        y_train, y_test = target.take(train_index), target.take(test_index)

        xgb.fit(X_train, y_train)
        y_pred = xgb.predict(X_test)

        actual_class.append(y_test.item())
        predicted_class.append(y_pred[0])

    print(f"XGBoost Accuracy: {accuracy_score(actual_class, predicted_class)*100:.2f}%")
    plot_confusion_matrix(actual_class, predicted_class, "XGBoost", class_labels)

In [5]:
fft_files = glob.glob("../Soheyl_Codes/Data/*")
df_list = []
for file in fft_files:
    df = pd.read_csv(file)
    class_label = file.split('/')[-1].split('.')[0]
    new_df = format_df(df, class_label)
    print(new_df)
    break
    df_list.append(new_df)

# all_classes_df = pd.concat(df_list, ignore_index=True).dropna()
# run_models(all_classes_df)

    0.0  15.625    31.25   46.875     62.5   78.125    93.75   109.375  \
0   0.0   1.000   2.0000   3.0000   4.0000   5.0000   6.0000    7.0000   
1   0.0  15.625  31.2500  46.8750  62.5000  78.1250  93.7500  109.3750   
2   0.0   0.000  61.1685  48.1984  51.6137  49.6714  52.2828   41.6394   
3   0.0   0.000  84.1489  73.7619  64.3837  76.6512  59.0365   49.3050   
4   0.0   0.000  52.6081  51.4466  51.6991  40.3641  59.0093   29.1792   
5   0.0   0.000  69.1186  50.2122  39.5148  55.1570  34.2372   55.4536   
6   0.0   0.000  46.8842  66.0305  38.4985  46.8529  33.7761   55.3225   
7   0.0   0.000  82.3337  79.0651  54.1015  79.4923  38.0339   68.9380   
8   0.0   0.000  82.8838  71.0699  66.0605  68.8566  65.1396   43.7217   
9   0.0   0.000  74.0737  67.4139  69.9484  70.5283  52.0391   60.6414   
10  0.0   0.000  76.4999  68.9072  66.5104  68.1486  53.2844   49.8953   
11  0.0   0.000  80.0200  61.2213  65.8027  73.1588  63.3765   54.8618   
12  0.0   0.000  68.2431  79.7225  68.