# Traditional machine learning algorithms to classify audio

In [1]:
import gc
import glob
import logging
import os
import random
import re
import sys
import time
import warnings
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.ERROR)

os.environ["CUDA_VISIBLE_DEVICES"] = ""

In [2]:
# data_dict = np.load('/kaggle/input/train-data-npy/train_data.npy', allow_pickle=True).item()
data_dict = np.load('dataset/train_data.npy', allow_pickle=True).item()

X_list = []
y_list = []
for fid, content in data_dict.items():
    mel_2d = content['data']  # shape (128,256)
    label_str = content['label']
    
    # Flatten from (128,256) => (32768,)
    mel_1d = mel_2d.flatten()
    
    X_list.append(mel_1d)
    y_list.append(label_str)

X_array = np.array(X_list)  # shape (N, 128*256)
y_array = np.array(y_list)  # shape (N,)

print("X_array shape:", X_array.shape)   # e.g. (N, 32768)
print("y_array shape:", y_array.shape)   # e.g. (N,)
print("Example label:", y_array[0])

X_array shape: (28564, 32768)
y_array shape: (28564,)
Example label: greani1


In [3]:
# 2. label
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_array)

# 3. split
X_train, X_test, y_train, y_test = train_test_split(X_array, y_encoded, test_size=0.2, random_state=42)

## LR

In [4]:
pipe = Pipeline([
    ('pca', PCA()),
    ('logreg', LogisticRegression(solver='saga', max_iter=10000))
])

param_dist = {
    'pca__n_components': [64, 128, 256],
    'logreg__C': np.logspace(-3, 3, 10),
    'logreg__penalty': ['l1', 'l2'],
    'logreg__solver': ['saga']
}
    
random_search = RandomizedSearchCV(
    pipe,
    param_distributions=param_dist,
    n_iter=15,
    scoring='accuracy',
    cv=3,
    verbose=2,
    random_state=42,
    n_jobs=2
)

random_search.fit(X_train, y_train)
print("Best parameters:", random_search.best_params_)

best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", acc)
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 15 candidates, totalling 45 fits


KeyboardInterrupt: 

In [None]:
results = random_search.cv_results_
plt.semilogx(param_dist['C'], results['mean_test_score'])
plt.xlabel('Regularization parameter C')
plt.ylabel('Mean cross-validated accuracy')
plt.title('Logistic Regression Hyperparameter Tuning')
plt.show()

import joblib

joblib.dump(random_search.best_estimator_, 'best_lr_model.pkl')

In [None]:
cv_results = pd.DataFrame(random_search.cv_results_)
cv_results.to_csv('search_results.csv', index=False)