# All features extracted by Librosa 


- Zero Crossing Rate, (mean and var)
- Harmonics (mean and var)
- Perceptrual (mean and var)
- Tempo
- Spectral Centroid (mean and var)
- Spectral Rolloff (mean and var)
- Spectral Bandwidth (mean and var)
- Mel-Frequency Cepstral Coefficients (20 different coefficients) (mean and var)
- Chroma (mean and var)
- rms energy (mean and var)
- lenghth of the audio file.

**Note on Librosa** 
- Librosa can open several audio formats beyond just WAV files. It uses backends like PySoundFile and audioread, which support formats such as MP3, FLAC, OGG, and more. Just ensure you have the necessary dependencies (like FFmpeg, libsndfile, etc.) installed for your system.

# Code for extracting features from one audio file 

Librosa Conversion code below ⤵  (for one audio file, as defined by the audio path) 

In [1]:
import os
print(os.getcwd())


/home/tford/code/zmokhtari89/k_means_klang/notebooks/librosa


In [4]:
#Import
import librosa
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler

general_path = '../../raw_data/Data'

#when in the .py we will ned to use os.join__file__ and then set the path 

# Define the path to the audio file
file_path = f'{general_path}/genres_original/jazz/jazz.00055.wav'

#------------------------------------------------------------
#Step 1:  # Step 1: Load and trim the audio file
#------------------------------------------------------------
y, sr = librosa.load(str(file_path)) 
audio_file, _ = librosa.effects.trim(y)

#------------------------------------------------------------
#Step 2: Extract features. When relevant, calculate mean and variance
#------------------------------------------------------------

# Length of the audio file (in samples)
length = audio_file.shape[0]

# Zero Crossing Rate
zero_crossings = librosa.zero_crossings(audio_file, pad=False)
zero_crossing_rate_mean = np.mean(zero_crossings)
zero_crossing_rate_var = np.var(zero_crossings)

# Harmonics & Percussive Components (HPSS)
y_harm, y_perc = librosa.effects.hpss(audio_file)
harmony_mean = np.mean(y_harm)
harmony_var = np.var(y_harm)
perceptr_mean = np.mean(y_perc)
perceptr_var = np.var(y_perc)

#Tempo: 
tempo_value, _ = librosa.beat.beat_track(y=audio_file, sr = sr) # a 2nd variable is created by the function, but we ignore it with the space' _ '.
tempo = tempo_value.item()

# Spectral Centroid
spectral_centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)[0]
spectral_centroid_mean = np.mean(spectral_centroids)
spectral_centroid_var = np.var(spectral_centroids)

# Spectral Rolloff
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)[0]
rolloff_mean = np.mean(spectral_rolloff)
rolloff_var = np.var(spectral_rolloff)

# Spectral Bandwidth
bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
spectral_bandwidth_mean = np.mean(bandwidth)
spectral_bandwidth_var = np.var(bandwidth)

#Chroma Frequencies (short-time fourier transform): 
hop_length = 5000   #Increase or decrease hop_length to change how granular you want your data to be
chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
chroma_stft_mean = np.mean(chromagram)
chroma_stft_var = np.var(chromagram)

# RMS Energy
rms_values = librosa.feature.rms(y=audio_file)
rms_mean = np.mean(rms_values)
rms_var = np.var(rms_values)

# Mel-Frequency Cepstral Coefficients (MFCCs)
mfccs = librosa.feature.mfcc(y=audio_file, sr=sr)
mfcc_means = np.mean(mfccs, axis=1)  # Array of 20 means
mfcc_vars = np.var(mfccs, axis=1)     # Array of 20 variances


#------------------------------------------------------------
#Step 3: Create a dictionary with all the features
#------------------------------------------------------------

features = {
    'filename': os.path.basename(file_path),
    'length': length,
    'chroma_stft_mean': chroma_stft_mean,
    'chroma_stft_var': chroma_stft_var,
    'rms_mean': rms_mean,
    'rms_var': rms_var,
    'spectral_centroid_mean': spectral_centroid_mean,
    'spectral_centroid_var': spectral_centroid_var,
    'spectral_bandwidth_mean': spectral_bandwidth_mean,
    'spectral_bandwidth_var': spectral_bandwidth_var,
    'rolloff_mean': rolloff_mean,
    'rolloff_var': rolloff_var,
    'zero_crossing_rate_mean': zero_crossing_rate_mean,
    'zero_crossing_rate_var': zero_crossing_rate_var,
    'harmony_mean': harmony_mean,
    'harmony_var': harmony_var,
    'perceptr_mean': perceptr_mean,
    'perceptr_var': perceptr_var,
    'tempo': tempo,
}

# Add MFCC features as separate columns
for i in range(len(mfcc_means)):
    features[f'mfcc{i+1}_mean'] = mfcc_means[i]
    features[f'mfcc{i+1}_var'] = mfcc_vars[i]

# Step 4: Create a pandas DataFrame
df = pd.DataFrame([features])






In [5]:
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,jazz.00055.wav,661794,0.264527,0.091105,0.091462,0.001233,871.796537,50292.72163,1205.655588,27508.71612,...,1.89609,44.752277,-2.675303,36.532848,-4.598643,61.776657,-2.376516,44.482979,-3.112725,52.320126


In [7]:
#Step 5: Drop columns 
X_features = df.drop(["filename", "length"], axis=1)
X_features

#Step 6: scale with MinMax
pd.set_option('display.float_format', '{:.20f}'.format)
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_features), columns=X_features.columns)
X_scaled

Unnamed: 0,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,rolloff_var,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Explanation:
- Audio Loading and Trimming: The file is loaded and trimmed using librosa.load and librosa.effects.trim.
- Feature Extraction: Various features are extracted (mean and variance where applicable) using librosa functions.
- MFCCs: The MFCC coefficients are processed along axis 1 to yield a mean and variance for each of the 20 coefficients.
- Dictionary Creation: All features are stored in a dictionary. The file name is extracted from the file path.
- DataFrame Creation: A DataFrame is created from the dictionary, resulting in a single-row DataFrame that encapsulates all the extracted features.

# Code for extracting features for multiple audio files in a folder (and subfolders)
- for various types of audio formats (see librosa note above)

**Optimized with parallell processing**  
takes appox 15 mins

In [13]:
%%time

import os
import librosa
import numpy as np
import pandas as pd
from pathlib import Path
from joblib import Parallel, delayed
from sklearn.preprocessing import MinMaxScaler

#defining a function to extract features 
def extract_features(file_path):
    # Load and trim the audio file
    y, sr = librosa.load(file_path)
    audio_file, _ = librosa.effects.trim(y)
    
    # Length (in samples)
    length = audio_file.shape[0]
    
    # Zero Crossing Rate
    zero_crossings = librosa.zero_crossings(audio_file, pad=False)
    zero_crossings_rate_mean = np.mean(zero_crossings)
    zero_crossings_rate_var = np.var(zero_crossings)
    
    # Harmonics & Percussive Components (HPSS)
    y_harm, y_perc = librosa.effects.hpss(audio_file)
    harmony_mean = np.mean(y_harm)
    harmony_var = np.var(y_harm)
    perceptr_mean = np.mean(y_perc)
    perceptr_var = np.var(y_perc)
    
    # Tempo
    tempo_value, _ = librosa.beat.beat_track(y=audio_file, sr=sr)
    tempo = tempo_value.item()
    
    # Spectral Centroid
    spectral_centroids = librosa.feature.spectral_centroid(y=audio_file, sr=sr)[0]
    spectral_centroid_mean = np.mean(spectral_centroids)
    spectral_centroid_var = np.var(spectral_centroids)
    
    # Spectral Rolloff
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio_file, sr=sr)[0]
    rolloff_mean = np.mean(spectral_rolloff)
    rolloff_var = np.var(spectral_rolloff)
    
    # Spectral Bandwidth
    bandwidth = librosa.feature.spectral_bandwidth(y=audio_file, sr=sr)
    spectral_bandwidth_mean = np.mean(bandwidth)
    spectral_bandwidth_var = np.var(bandwidth)
    
    # Chroma Frequencies
    hop_length = 5000  # Adjust for granularity
    chromagram = librosa.feature.chroma_stft(y=audio_file, sr=sr, hop_length=hop_length)
    chroma_mean = np.mean(chromagram)
    chroma_var = np.var(chromagram)
    
    # RMS Energy
    rms_values = librosa.feature.rms(y=audio_file)
    rms_mean = np.mean(rms_values)
    rms_var = np.var(rms_values)
    
    # MFCCs (20 coefficients)
    mfccs = librosa.feature.mfcc(y=audio_file, sr=sr)
    mfcc_means = np.mean(mfccs, axis=1)
    mfcc_vars = np.var(mfccs, axis=1)
    
    # Build feature dictionary
    features = {
        'file_name': os.path.basename(file_path),
        'length_samples': length,
        'zero_crossings_rate_mean': zero_crossings_rate_mean,
        'zero_crossings_rate_var': zero_crossings_rate_var,
        'harmony_mean': harmony_mean,
        'harmony_var': harmony_var,
        'perceptr_mean': perceptr_mean,
        'perceptr_var': perceptr_var,
        'tempo': tempo,
        'spectral_centroid_mean': spectral_centroid_mean,
        'spectral_centroid_var': spectral_centroid_var,
        'rolloff_mean': rolloff_mean,
        'rolloff_var': rolloff_var,
        'spectral_bandwidth_mean': spectral_bandwidth_mean,
        'spectral_bandwidth_var': spectral_bandwidth_var,
        'chroma_mean': chroma_mean,
        'chroma_var': chroma_var,
        'rms_mean': rms_mean,
        'rms_var': rms_var
    }
    
    # Add MFCC features (20 coefficients)
    for i in range(len(mfcc_means)):
        features[f'mfcc_mean_{i+1}'] = mfcc_means[i]
        features[f'mfcc_var_{i+1}'] = mfcc_vars[i]
    
    return features

# Define main folder path containing subfolders with audio files
main_folder_path = '../../raw_data/Data/genres_original/blues'

# Collect file paths from all subfolders using os.walk()
file_paths = []
for root, dirs, files in os.walk(main_folder_path):
    for filename in files:
        if filename.lower().endswith(('.wav', '.mp3', '.flac')):
            file_paths.append(os.path.join(root, filename))

# Use joblib to process files in parallel
data_list = Parallel(n_jobs=-1)(delayed(extract_features)(fp) for fp in file_paths)

# Create a DataFrame from the list of feature dictionaries
df= pd.DataFrame(data_list)





CPU times: user 1.47 s, sys: 240 ms, total: 1.71 s
Wall time: 1min 50s


In [20]:
df[~df['file_name'].str.contains("blues", case=False, na=False)]


Unnamed: 0,file_name,length_samples,zero_crossings_rate_mean,zero_crossings_rate_var,harmony_mean,harmony_var,perceptr_mean,perceptr_var,tempo,spectral_centroid_mean,...,mfcc_mean_16,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20
43,mp3_no-copyright-music-happy-306601.mp3,3518976,0.0507619261967117,0.0481851530455113,3.242182913e-07,0.0164587199687957,3.79334524041e-05,0.0062478389590978,89.10290948275862,1270.5876990610473,...,-0.8350149989128112,95.08554840087889,1.5285056829452515,83.6214141845703,7.481857776641845,104.85186004638672,1.3674513101577759,126.39393615722656,0.42667818069458,158.3740234375


### Drop columns and Scaling: 

In [21]:
#Step 5: Drop columns 
X_features = df.drop(["file_name", "length_samples"], axis=1)
X_features

#Step 6: scale with MinMax
pd.set_option('display.float_format', '{:.20f}'.format)
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_features), columns=X_features.columns)
X_scaled

Unnamed: 0,zero_crossings_rate_mean,zero_crossings_rate_var,harmony_mean,harmony_var,perceptr_mean,perceptr_var,tempo,spectral_centroid_mean,spectral_centroid_var,rolloff_mean,...,mfcc_mean_16,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20
0,0.14573997992362697218,0.16816807783981990720,0.98684723087806702768,0.54834978058803252043,0.85492152694560874515,0.03407501422302683380,0.66603415559772294330,0.33498302832662252282,0.27899572228676894348,0.40675773987551533395,...,0.31473838482055288557,0.49909181979265315743,0.13595630066799269109,0.57290643078083558493,0.00000000000000000000,0.46163096637930428612,0.28742082028629661883,0.59789145014232680797,0.24598283603731507796,0.47621838926739168407
1,0.77775248108784955647,0.80889128904795926100,0.98510308430906756705,0.08406989643815118707,0.81706476400576089070,0.13489963348786279673,0.73387096774193572024,0.85611976678107115646,0.09161281416107397435,0.89175095496158651720,...,0.82574790245117035692,0.29851298207769516591,0.41006995047240735186,0.21344561482251298012,0.85111411222673249988,0.09493799559140755906,0.54165495263257534830,0.16546876050220377108,0.88592030698798807098,0.36200037108868998725
2,0.25847812813693327172,0.29300612805028958263,0.98596447183429059002,0.40300496554061271404,0.83956915281249833161,0.14267797117120831207,0.60573476702508965985,0.32676285772113267125,0.33759383253123070867,0.35259022043660803725,...,0.66788734628762325141,0.56750499828219735932,0.47472942730235562214,0.58457471880347788851,0.72283507441290650863,0.40655255928013261935,0.67520355684494715032,0.21788560738202905620,0.66502105758376439670,0.30675271376503610021
3,0.49618652399333545988,0.54122030394335729486,0.98700147168814866827,0.14138242909878276365,0.85111266103243921055,0.16709171686589360806,0.07933740191804716790,0.55591149856187338507,1.00000000000000000000,0.52714051565345865225,...,0.35245633048343300020,0.28501521568128196371,0.82053482654753051584,0.30678367706580422780,0.43204130826784570285,0.08397930196219530186,0.59360271183406543294,0.05513603227118824002,0.44459918085808119992,0.12277435636539849040
4,0.68038207200587796919,0.71955694974546202669,0.98698491843286828562,0.45981460882091340636,0.85068152853215495135,0.54976858817578933003,0.38288920056100994582,0.85705308980505590100,0.19427958985837784356,1.00000000000000000000,...,0.72479889636102012851,0.18761472195203879321,0.56432207585840543640,0.23704660989521761372,0.85127842861721503276,0.14278284461351997692,0.56847324066591609082,0.24742516709150103038,0.67594505753935008041,0.27673052158807742718
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,0.35237139220333019152,0.39348166167411557925,0.98578980632620982760,0.37878960167962183458,0.85210403033385684335,0.31769365394840859418,0.09318996415770608444,0.39176263795656485200,0.37018161523938247770,0.38554133981841542944,...,0.19580238120632920129,0.34784267654764100541,0.33671452555474967738,0.21638195235789040027,0.22984803885374988086,0.06234524372136437953,0.57856343938458187903,0.11278994961046728829,0.50573804201844629880,0.28462712791453970151
97,0.82649460318117373703,0.85232771159167630692,0.98724707406101108731,0.24237433763426943778,0.84816452940279385420,0.60725129253692222697,0.34946236559139787214,0.73372785448814303866,0.15891722564027238596,0.71468276012706455358,...,0.73177728970891930960,0.09486994309742921572,0.59157252051894482214,0.14658048033012963107,0.66151423845275048041,0.16098734753640697726,0.51513321972009695759,0.15277773795206911478,0.72440808433702974511,0.07897698977927111730
98,0.36385838912978235582,0.40555587337805931725,1.00000000000000000000,0.16975663627796133692,0.93488906072289279514,0.13140377347353998649,0.23963133640553002657,0.42659030410476317385,0.14398498112229074497,0.46370801995745025703,...,0.45893476696299018158,0.32317697638742681754,0.64091143256735694234,0.17003738866797715734,0.40181659307868977660,0.19678581478748158329,0.58540590631960154333,0.34003725844849064242,0.64826690443249213036,0.43580782941406648323
99,0.43964152290672769974,0.48402162596757447588,0.98677554439730819080,0.30259080811430671965,0.84981190353054114617,0.66782298615769175587,0.55178268251273365763,0.60580754689868765439,0.06126285302401981991,0.67027554146020973391,...,0.65132424154964130292,0.05694222058839654155,0.45416501484380689124,0.07605521898195996555,0.55103291304400625616,0.15857566413226825697,0.59593006067468323739,0.07303589798766757513,0.77619746817496049207,0.18609148341136419469


In [23]:
X_scaled.head(44)

Unnamed: 0,zero_crossings_rate_mean,zero_crossings_rate_var,harmony_mean,harmony_var,perceptr_mean,perceptr_var,tempo,spectral_centroid_mean,spectral_centroid_var,rolloff_mean,...,mfcc_mean_16,mfcc_var_16,mfcc_mean_17,mfcc_var_17,mfcc_mean_18,mfcc_var_18,mfcc_mean_19,mfcc_var_19,mfcc_mean_20,mfcc_var_20
0,0.1457399799236269,0.1681680778398199,0.986847230878067,0.5483497805880325,0.8549215269456087,0.0340750142230268,0.6660341555977229,0.3349830283266225,0.2789957222867689,0.4067577398755153,...,0.3147383848205528,0.4990918197926531,0.1359563006679926,0.5729064307808355,0.0,0.4616309663793042,0.2874208202862966,0.5978914501423268,0.245982836037315,0.4762183892673916
1,0.7777524810878494,0.8088912890479591,0.9851030843090676,0.0840698964381511,0.8170647640057608,0.1348996334878627,0.7338709677419357,0.856119766781071,0.0916128141610739,0.8917509549615865,...,0.8257479024511702,0.2985129820776951,0.4100699504724073,0.2134456148225129,0.8511141122267324,0.0949379955914075,0.5416549526325753,0.1654687605022037,0.885920306987988,0.3620003710886899
2,0.2584781281369332,0.2930061280502895,0.9859644718342904,0.4030049655406127,0.8395691528124983,0.1426779711712083,0.6057347670250895,0.3267628577211326,0.3375938325312307,0.352590220436608,...,0.6678873462876233,0.5675049982821972,0.4747294273023556,0.5845747188034778,0.7228350744129065,0.4065525592801326,0.6752035568449472,0.217885607382029,0.6650210575837643,0.3067527137650361
3,0.4961865239933354,0.5412203039433572,0.9870014716881486,0.1413824290987827,0.8511126610324392,0.1670917168658936,0.0793374019180471,0.5559114985618733,1.0,0.5271405156534587,...,0.352456330483433,0.2850152156812819,0.8205348265475305,0.3067836770658042,0.4320413082678457,0.0839793019621953,0.5936027118340654,0.0551360322711882,0.4445991808580811,0.1227743563653984
4,0.6803820720058779,0.719556949745462,0.9869849184328682,0.4598146088209134,0.850681528532155,0.5497685881757893,0.3828892005610099,0.8570530898050559,0.1942795898583778,1.0,...,0.7247988963610201,0.1876147219520387,0.5643220758584054,0.2370466098952176,0.851278428617215,0.1427828446135199,0.568473240665916,0.247425167091501,0.67594505753935,0.2767305215880774
5,0.3744554025105814,0.416652447123434,0.986658345416398,0.3630343254334695,0.8521031259721524,0.4015128063167454,0.7338709677419357,0.4329000119010577,0.0708732612106672,0.4364548523743759,...,0.6318931805045437,0.0971116550853821,0.506905588832206,0.1444591232703373,0.4790216950275786,0.1350510900274566,0.5785240512978282,0.1023060751422117,0.6654802770955341,0.0170631135997477
6,0.1211515973134915,0.1403323995995457,0.9867098570214452,0.6548463691374244,0.8555285658893749,0.0723160664271964,0.6660341555977229,0.3563166362126756,0.4902493360509941,0.4309202888705053,...,0.0056743125682329,0.3625360196782422,0.1020249057672472,0.3794031559498564,0.0992538876860876,0.3656153152930111,0.3230923636351383,0.1620686212598343,0.0788555626603638,0.4251118164941015
7,0.910401423973673,0.9250960213930932,0.9735338171927727,0.0698563144637458,0.803447646773453,0.1475876346243096,0.23963133640553,1.0,0.2123055668946676,0.9877407182549442,...,0.6884820735562013,0.1925866251917552,0.1443742654460972,0.2920875365621928,0.9185429603667572,0.2814814987879247,0.3952528767357423,0.3418116055739565,0.6744691306385651,0.3293720697154979
8,0.5220788359843113,0.5670274192440266,0.713588332654124,0.1511843297944682,0.3622037425495724,0.096011696104924,0.2169076751946608,0.7628106714346627,0.1226011997260372,0.8795174713683513,...,0.9029387888862954,0.3398911238685035,0.700722285888263,0.2964721297547113,0.2839350748700464,0.3593387264471588,0.6156367756256631,0.9128198079307644,1.0,0.5477254228016134
9,0.2057000341505313,0.2351335786200831,0.98625558756223,0.105674671494338,0.8544329429284026,0.0819236539433236,0.23963133640553,0.2374805381858191,0.0519711621979922,0.2714051565345858,...,0.5398941683515472,0.4500201576535792,0.7679605915207861,0.4508233151352326,0.4371911968157574,0.2631529719301656,0.7391058956494768,0.3049258731026648,0.7297662439854855,0.2672739650168606


In [35]:
#create a csv file 
# df.to_csv('features_30_sec_taitest.csv', index=False) 