In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import librosa
from pydub import AudioSegment
import whisper
from urllib.request import urlopen
import io
from tqdm import tqdm
import librosa.display

from audiologic.utils import load_audio

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('data/all_audio.csv')
df = df.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1)
df.head()

Unnamed: 0,id,file,valence,source,tempo,beat_length,beat_diff
0,2,2.mp3,3.0,0,143.554688,104,18.456311
1,3,3.mp3,3.3,0,95.703125,71,26.9
2,4,4.mp3,5.5,0,172.265625,128,15.062992
3,5,5.mp3,5.3,0,99.384014,74,26.164384
4,7,7.mp3,6.4,0,117.453835,89,21.534091


In [9]:
def get_means(arr):
    return [arr.mean(), np.diff(arr).mean()]

def get_audio_features(file):

    features = {} # empty list for storing features
    
    audio, sample_rate = load_audio(file)
    
    # Spectral Centroid
    cent = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)
    features['centroid'], features['d_centroid'] = [[x] for x in get_means(cent)]
    
    # Spectral Rolloff
    rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sample_rate, roll_percent=0.95)
    features['rolloff'], features['d_rolloff'] = [[x] for x in get_means(rolloff)]
    
    rolloff_middle = librosa.feature.spectral_rolloff(y=audio, sr=sample_rate, roll_percent=0.5)
    features['rolloff_mid'], features['d_rolloff_mid'] = [[x] for x in get_means(rolloff_middle)]
    
    # Spectral Contrast
    S = np.abs(librosa.stft(y=audio))
    contrast = librosa.feature.spectral_contrast(S=S, sr=sample_rate)
    #print(f"len contrasts = {len(contrast)}")
    for i, cont in enumerate(contrast):
        features[f"contrast_{i}"], features[f"d_contrast_{i}"] = [[x] for x in get_means(cont)]
        
    # MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=10)
    #print(f"len mfccs = {len(mfccs)}")
    for i, mfcc in enumerate(mfccs):
        features[f"mfcc_{i}"], features[f"d_mfcc_{i}"] = [[x] for x in get_means(mfcc)]
    
    # RMS
    rms = librosa.feature.rms(y=audio)[0]
    features['rms'], features['d_rms'] = [[x] for x in get_means(rms)]
    
    #features = np.array(features).flatten()
    
    return features

In [55]:

def get_means(arr):
    return [arr.mean(), np.diff(arr).mean()]

def get_audio_features(dframe, file_col):

    df = dframe.copy()
    
    output = df[file_col].map(load_audio).transpose()
    df[['audio', 'sample_rate']] = pd.DataFrame(output.tolist(), index=df.index)
    print('Audio Loaded')
    
    # Spectral Centroid
    df['centroid'] = df.apply(lambda x: librosa.feature.spectral_centroid(y=x['audio'], sr=x['sample_rate']).mean(), axis=1)
    df['d_centroid'] = df.apply(lambda x: np.diff(librosa.feature.spectral_centroid(y=x['audio'], sr=x['sample_rate'])).mean(), axis=1)
    print('Centroids Calculated')

    ## Spectral Rolloff
    df['rolloff'] = df.apply(lambda x: librosa.feature.spectral_rolloff(y=x['audio'], sr=x['sample_rate'], roll_percent=0.95).mean(), axis=1)
    df['d_rolloff'] = df.apply(lambda x: np.diff(librosa.feature.spectral_rolloff(y=x['audio'], sr=x['sample_rate'], roll_percent=0.95)).mean(), axis=1)

    df['rolloff_mid'] = df.apply(lambda x: librosa.feature.spectral_rolloff(y=x['audio'], sr=x['sample_rate'], roll_percent=0.5).mean(), axis=1)
    df['d_rolloff_mid'] = df.apply(lambda x: np.diff(librosa.feature.spectral_rolloff(y=x['audio'], sr=x['sample_rate'], roll_percent=0.5)).mean(), axis=1)
    print('Finished Rolloffs')

    ## Spectral Contrast
    df['stft'] = np.abs(df['audio'].map(librosa.stft))
    cont = df.apply(lambda x: librosa.feature.spectral_contrast(S=x['stft'], sr=x['sample_rate']), axis=1)
    cont_cols = [f"contrast{i}" for i in range(1, len(cont.to_list()[0])+1)]
    cl = [np.mean(ii, axis=1) for ii in cont.tolist()]
    df[cont_cols] = pd.DataFrame(cl, index=df.index)

    cont_cols_d = [f"d_contrast{i}" for i in range(1, len(cont.to_list()[0])+1)]
    cld = [np.mean(np.diff(ii), axis=1) for ii in cont.tolist()]
    df[cont_cols_d] = pd.DataFrame(cld, index=df.index)
    print(f"{len(cont.to_list()[0])} contrast columns added")
        
    ## MFCCs
    num_mfccs = 15
    mfccs = df.apply(lambda x: librosa.feature.mfcc(y=x['audio'], sr=x['sample_rate'], n_mfcc=num_mfccs), axis=1)
    mfcc_cols = [f"mfcc{i}" for i in range(1, len(mfccs.to_list()[0])+1)]
    ml = [np.mean(ii, axis=1) for ii in mfccs.tolist()]
    df[mfcc_cols] = pd.DataFrame(ml, index=df.index)
    
    mfcc_cols_d = [f"d_mfcc{i}" for i in range(1, len(mfccs.to_list()[0])+1)]
    mld = [np.mean(np.diff(ii), axis=1) for ii in mfccs.tolist()]
    df[mfcc_cols_d] = pd.DataFrame(mld, index=df.index)
    print(f"finished all {num_mfccs} mfccs")
    
    ## RMS
    df['rms'] = df.apply(lambda x: librosa.feature.rms(y=x['audio'])[0].mean(), axis=1)
    df['d_rms'] = df.apply(lambda x: np.diff(librosa.feature.rms(y=x['audio'])[0]).mean(), axis=1)
    print('done!')

    output_df = df.drop(['audio', 'sample_rate', 'stft'], axis=1)
    
    
    return output_df

out_df = get_audio_features(df, 'file')
out_df.head()

# 66 mins, not even loaded

KeyboardInterrupt: 