# All features extracted by Librosa 


- Zero Crossing Rate, (mean and var)
- Harmonics (mean and var)
- Perceptrual (mean and var)
- Tempo
- Spectral Centroid (mean and var)
- Spectral Rolloff (mean and var)
- Spectral Bandwidth (mean and var)
- Mel-Frequency Cepstral Coefficients (20 different coefficients) (mean and var)
- Chroma (mean and var)
- rms energy (mean and var)
- lenghth of the audio file.

**Note on Librosa** 
- Librosa can open several audio formats beyond just WAV files. It uses backends like PySoundFile and audioread, which support formats such as MP3, FLAC, OGG, and more. Just ensure you have the necessary dependencies (like FFmpeg, libsndfile, etc.) installed for your system.

# Code for extracting features from one audio file 

Librosa Conversion code below ⤵  (for one audio file, as defined by the audio path) 

In [24]:
#Import
import librosa
import numpy as np
import pandas as pd
import os

general_path = '../k_means_klang/raw_data/Data'

print(list(os.listdir(f'{general_path}/genres_original/')))

# Define the path to the audio file
file_path = f'{general_path}/genres_original/jazz/jazz.00055.wav'

#------------------------------------------------------------
#Step 1:  # Step 1: Load and trim the audio file
#------------------------------------------------------------
y, sr = librosa.load(file_path) 
audio_file, _ = librosa.effects.trim(y)

#------------------------------------------------------------
#Step 2: Extract features. When relevant, calculate mean and variance
#------------------------------------------------------------

# Length of the audio file (in samples)
length = audio_file.shape[0]

# Zero Crossing Rate
zero_crossings = librosa.zero_crossings(audio_file, pad=False)
zero_crossing_rate_mean = np.mean(zero_crossings)
zero_crossing_rate_var = np.var(zero_crossings)

# Harmonics & Percussive Components (HPSS)
y_harm, y_perc = librosa.effects.hpss(audio_file)
harmony_mean = np.mean(y_harm)
harmony_var = np.var(y_harm)
perceptr_mean = np.mean(y_perc)
perceptr_var = np.var(y_perc)

#Tempo: 
tempo_value, _ = librosa.beat.beat_track(y=audio_file, sr = sr) # a 2nd variable is created by the function, but we ignore it with the space' _ '.
tempo = tempo_value.item()

# Spectral Centroid
spectral_centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)[0]
spectral_centroid_mean = np.mean(spectral_centroids)
spectral_centroid_var = np.var(spectral_centroids)

# Spectral Rolloff
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)[0]
rolloff_mean = np.mean(spectral_rolloff)
rolloff_var = np.var(spectral_rolloff)

# Spectral Bandwidth
bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
spectral_bandwidth_mean = np.mean(bandwidth)
spectral_bandwidth_var = np.var(bandwidth)

#Chroma Frequencies (short-time fourier transform): 
hop_length = 5000   #Increase or decrease hop_length to change how granular you want your data to be
chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
chroma_stft_mean = np.mean(chromagram)
chroma_stft_var = np.var(chromagram)

# RMS Energy
rms_values = librosa.feature.rms(y=audio_file)
rms_mean = np.mean(rms_values)
rms_var = np.var(rms_values)

# Mel-Frequency Cepstral Coefficients (MFCCs)
mfccs = librosa.feature.mfcc(y=audio_file, sr=sr)
mfcc_means = np.mean(mfccs, axis=1)  # Array of 20 means
mfcc_vars = np.var(mfccs, axis=1)     # Array of 20 variances


#------------------------------------------------------------
#Step 3: Create a dictionary with all the features
#------------------------------------------------------------

data = {
    'file_name': file_path.split('/')[-1],
    'length_samples': length,
    'zero_crossing_rate_mean': zero_crossing_rate_mean,
    'zero_crossing_rate_var': zero_crossing_rate_var,
    'harmony_mean': harmony_mean,
    'harmony_var': harmony_var,
    'perceptr_mean': perceptr_mean,
    'perceptr_var': perceptr_var,
    'tempo': tempo,
    'spectral_centroid_mean': spectral_centroid_mean,
    'spectral_centroid_var': spectral_centroid_var,
    'spectral_rolloff_mean': rolloff_mean,
    'spectral_rolloff_var': rolloff_var,
    'spectral_bandwidth_mean': spectral_bandwidth_mean,
    'spectral_bandwidth_var': spectral_bandwidth_var,
    'chroma_stft_mean': chroma_stft_mean,
    'chroma_stft_var': chroma_stft_var,
    'rms_mean': rms_mean,
    'rms_var': rms_var
}

# Add MFCC features as separate columns
for i in range(len(mfcc_means)):
    data[f'mfcc_mean_{i+1}'] = mfcc_means[i]
    data[f'mfcc_var_{i+1}'] = mfcc_vars[i]

# Step 4: Create a pandas DataFrame
df = pd.DataFrame([data])
df.head()



['hiphop', 'classical', 'blues', 'metal', 'jazz', 'country', 'pop', 'rock', 'disco', 'reggae']


Unnamed: 0,file_name,length_samples,zero_crossing_rate_mean,zero_crossing_rate_var,harmony_mean,harmony_var,perceptr_mean,perceptr_var,tempo,spectral_centroid_mean,...,mfcc_mean_16,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20
0,jazz.00055.wav,661794,0.037837,0.036405,-0.001877,0.007581,-0.000975,0.000567,71.777344,871.796537,...,1.89609,44.752277,-2.675303,36.532848,-4.598643,61.776657,-2.376516,44.482979,-3.112725,52.320126


Explanation:
- Audio Loading and Trimming: The file is loaded and trimmed using librosa.load and librosa.effects.trim.
- Feature Extraction: Various features are extracted (mean and variance where applicable) using librosa functions.
- MFCCs: The MFCC coefficients are processed along axis 1 to yield a mean and variance for each of the 20 coefficients.
- Dictionary Creation: All features are stored in a dictionary. The file name is extracted from the file path.
- DataFrame Creation: A DataFrame is created from the dictionary, resulting in a single-row DataFrame that encapsulates all the extracted features.

# Code for extracting features for multiple audio files in a folder (and subfolders)
- for various types of audio formats (see librosa note above)

**Optimized with parallell processing**  
takes appox 15 mins

In [26]:
%%time

import os
import librosa
import numpy as np
import pandas as pd
from joblib import Parallel, delayed

#defining a function to extract features 
def extract_features(file_path):
    # Load and trim the audio file
    y, sr = librosa.load(file_path)
    audio_file, _ = librosa.effects.trim(y)
    
    # Length (in samples)
    length = audio_file.shape[0]
    
    # Zero Crossing Rate
    zero_crossings = librosa.zero_crossings(audio_file, pad=False)
    zero_crossings_rate_mean = np.mean(zero_crossings)
    zero_crossings_rate_var = np.var(zero_crossings)
    
    # Harmonics & Percussive Components (HPSS)
    y_harm, y_perc = librosa.effects.hpss(audio_file)
    harmony_mean = np.mean(y_harm)
    harmony_var = np.var(y_harm)
    perceptr_mean = np.mean(y_perc)
    perceptr_var = np.var(y_perc)
    
    # Tempo
    tempo_value, _ = librosa.beat.beat_track(y=audio_file, sr=sr)
    tempo = tempo_value.item()
    
    # Spectral Centroid
    spectral_centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)[0]
    spectral_centroid_mean = np.mean(spectral_centroids)
    spectral_centroid_var = np.var(spectral_centroids)
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)[0]
    rolloff_mean = np.mean(spectral_rolloff)
    rolloff_var = np.var(spectral_rolloff)
    
    # Spectral Bandwidth
    bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
    spectral_bandwidth_mean = np.mean(bandwidth)
    spectral_bandwidth_var = np.var(bandwidth)
    
    # Chroma Frequencies
    hop_length = 5000  # Adjust for granularity
    chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
    chroma_mean = np.mean(chromagram)
    chroma_var = np.var(chromagram)
    
    # RMS Energy
    rms_values = librosa.feature.rms(y=audio_file)
    rms_mean = np.mean(rms_values)
    rms_var = np.var(rms_values)
    
    # MFCCs (20 coefficients)
    mfccs = librosa.feature.mfcc(y=audio_file, sr=sr)
    mfcc_means = np.mean(mfccs, axis=1)
    mfcc_vars = np.var(mfccs, axis=1)
    
    # Build feature dictionary
    features = {
        'file_name': os.path.basename(file_path),
        'length_samples': length,
        'zero_crossings_rate_mean': zero_crossings_rate_mean,
        'zero_crossings_rate_var': zero_crossings_rate_var,
        'harmony_mean': harmony_mean,
        'harmony_var': harmony_var,
        'perceptr_mean': perceptr_mean,
        'perceptr_var': perceptr_var,
        'tempo': tempo,
        'spectral_centroid_mean': spectral_centroid_mean,
        'spectral_centroid_var': spectral_centroid_var,
        'rolloff_mean': rolloff_mean,
        'rolloff_var': rolloff_var,
        'spectral_bandwidth_mean': spectral_bandwidth_mean,
        'spectral_bandwidth_var': spectral_bandwidth_var,
        'chroma_mean': chroma_mean,
        'chroma_var': chroma_var,
        'rms_mean': rms_mean,
        'rms_var': rms_var
    }
    
    # Add MFCC features (20 coefficients)
    for i in range(len(mfcc_means)):
        features[f'mfcc_mean_{i+1}'] = mfcc_means[i]
        features[f'mfcc_var_{i+1}'] = mfcc_vars[i]
    
    return features

# Define main folder path containing subfolders with audio files
main_folder_path = 'raw_data/Data/genres_original'

# Collect file paths from all subfolders using os.walk()
file_paths = []
for root, dirs, files in os.walk(main_folder_path):
    for filename in files:
        if filename.lower().endswith(('.wav', '.mp3', '.flac')):
            file_paths.append(os.path.join(root, filename))

# Use joblib to process files in parallel
data_list = Parallel(n_jobs=-1)(delayed(extract_features)(fp) for fp in file_paths)

# Create a DataFrame from the list of feature dictionaries
df = pd.DataFrame(data_list)
print(df.head())

          file_name  length_samples  zero_crossings_rate_mean  \
0  hiphop.00023.wav          661794                  0.106795   
1  hiphop.00005.wav          661504                  0.124813   
2  hiphop.00038.wav          623072                  0.105351   
3  hiphop.00089.wav          661794                  0.117709   
4  hiphop.00011.wav          661794                  0.094250   

   zero_crossings_rate_var  harmony_mean  harmony_var  perceptr_mean  \
0                 0.095389     -0.000016     0.017382      -0.000355   
1                 0.109234     -0.000284     0.032475      -0.002899   
2                 0.094252      0.001348     0.039337      -0.003059   
3                 0.103853     -0.000021     0.009038      -0.000635   
4                 0.085367     -0.000037     0.020214      -0.001135   

   perceptr_var       tempo  spectral_centroid_mean  ...  mfcc_mean_16  \
0      0.010511   92.285156             2240.289987  ...      4.564087   
1      0.018903   71.777344 

In [33]:
#sort values alphabetically 
df.sort_values('file_name', ascending=True, inplace=True)


In [34]:
df.head()

Unnamed: 0,file_name,length_samples,zero_crossings_rate_mean,zero_crossings_rate_var,harmony_mean,harmony_var,perceptr_mean,perceptr_var,tempo,spectral_centroid_mean,...,mfcc_mean_16,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20
0,blues.00000.wav,661794,0.083154,0.07624,-4.9e-05,0.008172,-1.1e-05,0.005704,123.046875,1784.122641,...,0.751707,52.424534,-1.687854,36.535866,-0.40873,41.603168,-2.302677,55.053654,1.222467,46.941349
1,blues.00001.wav,661794,0.056119,0.052969,0.000141,0.005101,-0.00018,0.003064,67.999589,1530.261767,...,0.929294,55.337963,-0.728403,60.231407,0.296872,48.133217,-0.28243,51.106014,0.530644,45.7887
2,blues.00002.wav,661794,0.076403,0.070566,-2e-06,0.016344,-2e-05,0.007464,161.499023,1552.832481,...,2.448304,40.641678,-7.72484,47.629646,-1.819024,52.393604,-3.440457,46.643398,-2.238128,30.653151
3,blues.00003.wav,661794,0.033359,0.032246,4e-06,0.019055,-1.8e-05,0.002711,63.024009,1070.153418,...,0.773994,44.432903,-3.324069,50.218452,0.636311,37.325726,-0.615968,37.257774,-3.405046,31.965254
4,blues.00004.wav,661794,0.10158,0.091261,-1.8e-05,0.004814,-1e-05,0.003094,135.999178,1835.128513,...,-4.515863,85.995193,-5.451786,75.276741,-0.915952,53.633236,-4.408018,62.882484,-11.704385,55.190254


In [35]:
#create a csv file 
df.to_csv('features_30_sec_taitest.csv', index=False) 