# Audio Feature Extraction
 
## Extract various features from audio

In [24]:
from __future__ import print_function

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("dark")
sns.set_context("talk")

import librosa.display
import librosa

import pickle

In [25]:
#import csv data files
movieRuntimePath = 'Numerical Data//movie_runtimes.csv'
movieRuntimeDf = pd.read_csv(movieRuntimePath, usecols = ['movie', 'runtime (mins)', 'effective runtime'], header = 0)
#create a list of movies
movieList = list(movieRuntimeDf['movie'])

In [26]:
#import the raw audio pickle objects

sr = 22050 #sampling rate 

rawAudio = dict()
for movie in movieList:
    try:
        basePath = 'Feature Extraction Pickle Objects//Raw Audio File Pickle Objects//'
        moviePath = basePath + movie + '.p'
        rawAudio[movie] = pickle.load(open(moviePath,"rb"))
    except FileNotFoundError:
        pass

In [28]:
#extract features 
audioFeatures = dict()

for movie in movieList:
    try:
        index = movieList.index(movie)

        y = rawAudio[movie]
        #split original dataset y into smaller datasets that correspond to the 30s intervals
        runtime = movieRuntimeDf.loc[index]['runtime (mins)'] 
        intervals = runtime * 2
        x = np.array_split(y,intervals)

        for k in x:

            featureDict = dict()

            #mel power spectrogram
            mel = librosa.feature.melspectrogram(y=k,sr=sr)
            #convert to log scale (dB) and use peak power as a reference
            logMel = librosa.power_to_db(mel, ref=np.max)

            #chroma - pitch class information
            chroma = librosa.feature.chroma_cqt(y = k, sr=sr)

            #estimated tempo information
            tempo, beat_frames = librosa.beat.beat_track(y = k,sr=sr)

            #mfcc 
            mfcc = librosa.feature.mfcc(y=k, sr=sr, n_mfcc = 40) #40 is the amount of cepstral vectors 

            #spectral centroid - relates to brightness of sound
            specCentroid = librosa.feature.spectral_centroid(y = k, sr=sr)

            #spectral contrast
            specContrast = librosa.feature.spectral_contrast(y = k, sr=sr)

            #tonnetz - tonal centroid features
            tonnetz = librosa.feature.tonnetz(y = k, sr = sr)

            featureDict['logMel'] = logMel
            featureDict['chroma'] = chroma
            featureDict['tempo'] = tempo
            featureDict['mfcc'] = mfcc
            featureDict['specCentroid'] = specCentroid
            featureDict['specContrast'] = specContrast
            featureDict['tonnetz'] = tonnetz

        audioFeatures[movie] = featureDict
    except KeyError:
        pass

In [29]:
len(audioFeatures)

7

In [30]:
#save pickle objects
pickle.dump(audioFeatures, open('Feature Extraction Pickle Objects//audioFeatures.p', "wb" ))