In [1]:
import librosa
import librosa.display
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import cv2

In [2]:
data_dir = "generes_raw"  
output_dir = "generes_spectrograms" 

In [3]:
os.makedirs(output_dir, exist_ok=True)

In [4]:
def process_audio(file_path, category, output_dir, sr=22050):
    try:
        y, sr = librosa.load(file_path, sr=sr, duration=30) 

        #mel spectrogram
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        spectrogram_dir = os.path.join(output_dir, category)
        os.makedirs(spectrogram_dir, exist_ok=True)
        output_image_path = os.path.join(spectrogram_dir, f"{os.path.splitext(os.path.basename(file_path))[0]}.png")
        plt.figure(figsize=(4, 2))
        librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='mel', fmax=8000, cmap='magma')
        plt.axis('off')
        plt.savefig(output_image_path, bbox_inches='tight', pad_inches=0)
        plt.close()

        # Frequency Domain Features
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85).mean()

        # Time Domain Features
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y).mean()
        rmse = librosa.feature.rms(y=y).mean()
        temporal_entropy = -np.sum((y**2) * np.log2(y**2 + 1e-10)) 
        
        return [
            os.path.basename(file_path), category,
            spectral_centroid, spectral_bandwidth, spectral_rolloff,
            zero_crossing_rate, rmse, temporal_entropy
        ]
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return [category, os.path.basename(file_path), 0, 0, 0, 0, 0, 0]

In [5]:
features_list = []

In [6]:
for category in os.listdir(data_dir):
    category_path = os.path.join(data_dir, category)
    if os.path.isdir(category_path):  
        for file in os.listdir(category_path):
            if file.endswith(".wav"):
                file_path = os.path.join(category_path, file)
                features = process_audio(file_path, category, output_dir)
                features_list.append(features)

In [7]:
columns = [
    "Audio File", "Category",
    "Spectral Centroid", "Spectral Bandwidth", "Spectral Roll-Off",
    "Zero Crossing Rate", "RMS Energy", "Temporal Entropy"
]

In [8]:
features = pd.DataFrame(features_list, columns=columns)
features.to_csv("extracted_audio_features.csv", index=False)
print("Spectrograms saved and features saved to 'extracted_audio_features.csv'")

Spectrograms saved and features saved to 'extracted_audio_features.csv'
