In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression as LR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
mfcc_train_data = pd.read_csv('train_data_features.csv').drop(columns = ['Number', 'Labels'])
print(f"The shape of train data is : {mfcc_train_data.shape}")
df_test = pd.read_csv('test_data_features.csv').drop(columns = ['Unnamed: 0'])
print(f"The shape of test data is : {df_test.shape}")

The shape of train data is : (60, 470)
The shape of test data is : (116, 470)


In [7]:
mfcc_train_data.head()

Unnamed: 0,mfcc0_mean,mfcc0_std,mfcc0_median,mfcc0_delta_mean,mfcc0_delta_std,mfcc0_delta_max,mfcc0_delta2_mean,mfcc0_delta2_std,mfcc0_delta2_max,mfcc0_delta_zero_crossings,...,total_dynamics,dynamics_std,total_acceleration,acceleration_std,silence_ratio,mfcc0_temporal_variation,mfcc4_temporal_variation,mfcc8_temporal_variation,mfcc12_temporal_variation,mfcc16_temporal_variation
0,0.965574,-0.846333,0.773404,1.109851,-0.818847,-0.363436,-0.957825,-0.848768,-0.691208,0.459456,...,-0.343208,-0.597213,-0.252168,-0.529969,0,-0.445369,0.882488,0.172927,0.154245,-0.891079
1,1.221782,-0.232754,1.059795,-0.227035,-0.252809,0.817362,0.230274,0.452816,-0.022538,0.663393,...,0.427542,0.155486,0.383529,0.059388,0,-0.406542,-1.290325,-0.15962,-0.124931,1.129333
2,-1.497383,-1.375957,-1.506125,1.23766,-0.927007,0.224326,-0.624692,-0.870208,0.196343,0.933975,...,-0.437609,-0.605007,-0.17537,-0.410911,0,-0.671704,-0.349759,-0.801197,-0.854113,-0.218762
3,0.15759,-0.416727,0.041109,1.095874,0.180522,-0.134155,-1.004388,0.197563,0.07127,0.940639,...,1.071605,0.785732,0.849882,0.527501,0,-0.349795,-0.266917,0.233113,-0.375429,0.789002
4,0.53266,-0.754534,0.385636,-0.256173,0.364987,-0.755823,0.21947,0.352273,-0.478024,1.136578,...,0.450826,0.339385,0.655725,0.485914,0,-1.040128,-1.540941,-1.129538,-0.884178,-0.836787


In [8]:
df_test.head()

Unnamed: 0,mfcc0_mean,mfcc0_std,mfcc0_median,mfcc0_delta_mean,mfcc0_delta_std,mfcc0_delta_max,mfcc0_delta2_mean,mfcc0_delta2_std,mfcc0_delta2_max,mfcc0_delta_zero_crossings,...,total_dynamics,dynamics_std,total_acceleration,acceleration_std,silence_ratio,mfcc0_temporal_variation,mfcc4_temporal_variation,mfcc8_temporal_variation,mfcc12_temporal_variation,mfcc16_temporal_variation
0,-0.292526,0.365549,-0.187034,0.005553,-0.717463,-1.103212,0.807582,-0.772851,-1.024387,-1.566001,...,-0.852446,-0.611122,-0.767106,-0.522607,-0.09325,1.031721,0.454313,0.97713,0.22381,0.142211
1,-0.253442,1.062163,-0.271157,0.188874,-0.99776,-0.206293,-0.075196,-1.003867,-0.468719,-1.558245,...,-1.584046,-1.405211,-1.578296,-1.450309,-0.09325,1.779221,1.669138,2.591698,2.045996,0.321278
2,1.532276,1.602907,1.627084,0.051644,2.174294,0.746266,-0.229681,1.710536,1.435471,0.487802,...,2.487848,2.819163,2.572681,2.327128,-0.09325,1.208683,1.524453,0.333363,0.413361,-0.798181
3,-0.553325,1.136299,-0.170002,0.051636,-0.246914,0.529529,-0.207762,-0.399915,0.566803,0.217892,...,1.053791,0.811415,1.249023,1.010389,-0.09325,0.979882,2.497061,2.367395,1.078909,1.271133
4,-0.637711,-1.646214,-0.750066,0.200493,-0.5322,-1.337644,1.676917,-0.310859,-1.06402,0.050361,...,-0.539268,-0.656143,-0.264083,-0.467822,-0.09325,-1.113713,-0.497402,-0.17833,0.089863,-1.100175


In [9]:
# Separate features (X) and labels (y)
X = mfcc_train_data.iloc[:, :].values  # Features
# y = mfcc_train_data.iloc[:, 0].values  # Labels
y = [1]*10 + [2]*10 + [3]*10 + [4]*10 + [5]*10 + [6]*10

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier
LR_model = LR()

# Train the SVM model
LR_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = LR_model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
confusion_mat = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("---------------------")

Accuracy: 0.6666666666666666
Precision: 0.7638888888888888
Recall: 0.6666666666666666
F1-score: 0.6944444444444443
---------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
file_name = []
for a in range(116):
    file_name.append(f'{a+1}-MFCC')

y_actual_pred = LR_model.predict(df_test)
print(y_actual_pred)

str_labels = ['Asha Bhosale', 'Bhaav Geet', 'Michael Jackson', 'National Anthem', 'Kishor Kumar', 'Marathi Lavni']
str_preds = []
for label in y_actual_pred:
    str_preds.append(str_labels[label - 1])

dict_final = {'Files': file_name, 'Predictions': str_preds}
df_final = pd.DataFrame(dict_final)
df_final.head()

[4 4 3 1 2 1 5 3 2 6 1 1 1 5 1 4 4 5 1 3 1 1 1 6 5 2 4 1 2 1 4 1 1 3 4 1 5
 2 1 2 6 6 1 3 3 5 6 6 6 2 5 1 3 6 3 1 5 5 5 1 4 6 5 6 5 2 5 1 3 6 1 1 6 3
 4 6 1 3 1 2 4 2 5 5 1 3 4 3 6 4 1 1 5 6 4 2 5 3 6 2 5 2 3 1 2 1 4 4 6 1 2
 6 6 3 2 4]




Unnamed: 0,Files,Predictions
0,1-MFCC,National Anthem
1,2-MFCC,National Anthem
2,3-MFCC,Michael Jackson
3,4-MFCC,Asha Bhosale
4,5-MFCC,Bhaav Geet


In [11]:
from collections import Counter

count_dict = Counter(str_preds)
count_dict

Counter({'Asha Bhosale': 31,
         'Marathi Lavni': 19,
         'Kishor Kumar': 18,
         'National Anthem': 16,
         'Michael Jackson': 16,
         'Bhaav Geet': 16})