# Traditional machine learning algorithms to classify audio

In [2]:
import time
from concurrent.futures import ThreadPoolExecutor
import glob
import librosa
import numpy as np
import os
import pandas as pd
import re
import sys
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm
import torchaudio
import torchaudio.transforms as AT
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy import signal
import gc
import warnings
import logging
from pathlib import Path
import random
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline


warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.ERROR)

os.environ['CUDA_VISIBLE_DEVICES'] = ''
torch.set_num_threads(4) 

In [4]:
data_dict = np.load('/kaggle/input/train-data-npy/train_data.npy', allow_pickle=True).item()

X_list = []
y_list = []
for fid, content in data_dict.items():
    mel_2d = content['data']  # shape (128,256)
    label_str = content['label']
    
    # Flatten from (128,256) => (32768,)
    mel_1d = mel_2d.flatten()
    
    X_list.append(mel_1d)
    y_list.append(label_str)

X_array = np.array(X_list)  # shape (N, 128*256)
y_array = np.array(y_list)  # shape (N,)

print("X_array shape:", X_array.shape)   # e.g. (N, 32768)
print("y_array shape:", y_array.shape)   # e.g. (N,)
print("Example label:", y_array[0])

X_array shape: (28564, 32768)
y_array shape: (28564,)
Example label: greani1


In [7]:
# 2. label
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_array)

# 3. split
X_train, X_test, y_train, y_test = train_test_split(X_array, y_encoded, test_size=0.2, random_state=42)

## LR

In [None]:
pipe = Pipeline([
    ('pca', PCA()),
    ('logreg', LogisticRegression(solver='saga', max_iter=1000))
])

param_dist = {
    'pca__n_components': [128, 256, 512, 1024],
    'logreg__C': np.logspace(-3, 3, 10),
    'logreg__penalty': ['l1', 'l2'],
    'logreg__solver': ['saga']
}
    
random_search = RandomizedSearchCV(
    pipe,
    param_distributions=param_dist,
    n_iter=15,
    scoring='accuracy',
    cv=3,
    verbose=2,
    random_state=42
)

random_search.fit(X_train, y_train)
print("Best parameters:", random_search.best_params_)

best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", acc)
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 15 candidates, totalling 45 fits


In [None]:
results = random_search.cv_results_
plt.semilogx(param_dist['C'], results['mean_test_score'])
plt.xlabel('Regularization parameter C')
plt.ylabel('Mean cross-validated accuracy')
plt.title('Logistic Regression Hyperparameter Tuning')
plt.show()