In [None]:
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
from scipy.stats import skew, kurtosis

In [None]:
# Directory where audio files are stored
audio_dir = "./audio_samples/"  # Change this to your dataset path

# Features to extract
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=44100)
    
    # Spectral features
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    
    # MFCCs (First 3 coefficients as features)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfccs[:3], axis=1)
    
    # RMS Energy (Loudness)
    rms_energy = np.mean(librosa.feature.rms(y=y))
    
    # Zero Crossing Rate (Helps detect percussive or modulated signals)
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
    
    # Spectral Contrast (Measures energy difference between peaks and valleys in the spectrum)
    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
    
    # Skewness & Kurtosis (Shape of frequency distribution)
    skewness = skew(y)
    kurt = kurtosis(y)
    
    # Return feature dictionary
    return {
        "file": os.path.basename(file_path),
        "spectral_centroid": spectral_centroid,
        "spectral_bandwidth": spectral_bandwidth,
        "spectral_rolloff": spectral_rolloff,
        "mfcc_1": mfcc_mean[0],
        "mfcc_2": mfcc_mean[1],
        "mfcc_3": mfcc_mean[2],
        "rms_energy": rms_energy,
        "zero_crossing_rate": zero_crossing_rate,
        "spectral_contrast": spectral_contrast,
        "skewness": skewness,
        "kurtosis": kurt
    }

# Process all audio files
feature_list = []
for file in os.listdir(audio_dir):
    if file.endswith(".wav"):
        file_path = os.path.join(audio_dir, file)
        features = extract_features(file_path)
        feature_list.append(features)

# Convert to DataFrame
df = pd.DataFrame(feature_list)

# Save to CSV
df.to_csv("audio_features.csv", index=False)
print("Feature extraction complete. Saved as 'audio_features.csv'.")
