# Audio Feature Extraction
 
## Extract various features from audio

In [1]:
import numpy as np
import pandas as pd
import librosa
import pickle

In [2]:
#import csv data files
base_path = r'data/mounted/Numerical Data/'
movieRuntimePath = base_path + 'movie_runtimes.csv'
movieRuntimeDf = pd.read_csv(movieRuntimePath, usecols = ['movie', 'runtime (mins)', 'effective runtime'], header = 0)
#create a list of movies
movieList = list(movieRuntimeDf['movie'])

In [3]:
#import the raw audio pickle objects

sr = 22050 #sampling rate 

rawAudio = dict()
for movie in movieList:
    try:
        basePath = 'data//mounted//Pickle Objects//Raw Audio File Pickle Objects//'
        moviePath = basePath + movie + '.p'
        rawAudio[movie] = pickle.load(open(moviePath,"rb"))
    except FileNotFoundError:
        print(movie)

In [4]:
#extract features 
audioFeatures = dict()

for movie in movieList:
    print(movie)
    try:
        index = movieList.index(movie)
        y = rawAudio[movie]
    except: 
        continue
        
    #split original dataset y into smaller datasets that correspond to the 30s intervals
    runtime = movieRuntimeDf.loc[index]['runtime (mins)'] 
    intervals = runtime * 2
    x = np.array_split(y,intervals)

    featureDict = dict()
    logMelList = list()
    chromaList = list()
    tempoList = list()
    mfccList = list()
    specCentroidList = list()
    specContrastList = list()
    tonnetzList = list()
    loudnessList = list()

    for k in x:
        
        #loudness
        loudness = librosa.feature.rms(y=k)

        #mel power spectrogram
        mel = librosa.feature.melspectrogram(y=k,sr=sr)
        #convert to log scale (dB) and use peak power as a reference
        logMel = librosa.power_to_db(mel, ref=np.max)

        #chroma - pitch class information
        chroma = librosa.feature.chroma_cqt(y = k, sr=sr)

        #estimated tempo information
        tempo, beat_frames = librosa.beat.beat_track(y = k,sr=sr)

        #mfcc 
        mfcc = librosa.feature.mfcc(y=k, sr=sr, n_mfcc = 40) #40 is the amount of cepstral vectors 

        #spectral centroid - relates to brightness of sound
        specCentroid = librosa.feature.spectral_centroid(y = k, sr=sr)

        #spectral contrast
        specContrast = librosa.feature.spectral_contrast(y = k, sr=sr)

        #tonnetz - tonal centroid features
        tonnetz = librosa.feature.tonnetz(y = k, sr = sr)
        
        #zero crossing rate

        logMelList.append(logMel)
        chromaList.append(chroma)
        tempoList.append(tempo)
        mfccList.append(mfcc)
        specCentroidList.append(specCentroid)
        specContrastList.append(specContrast)
        tonnetzList.append(tonnetz)
        loudnessList.append(loudness)

    featureDict['logMel'] = logMelList
    featureDict['chroma'] = chromaList
    featureDict['tempo'] = tempoList
    featureDict['mfcc'] = mfccList
    featureDict['specCentroid'] = specCentroidList
    featureDict['specContrast'] = specContrastList
    featureDict['tonnetz'] = tonnetzList
    featureDict['loudness'] = loudnessList

    audioFeatures[movie] = featureDict


Hobbit 2
Suck Me Shakespeer
Walking with Dinosaurs
Machete Kills
Buddy
The Hunger Games-Catching Fire
Walter Mitty
Paranormal Activity
Star Wars-The Force Awakens
I'm Off Then
Help, I Shrunk My Teacher


In [7]:
#save pickle objects
#tried to save the whole audio feature dictionary but memory requirements wont allow it
#saving each movie independently
for movie in movieList:
    try:
        audioFeaturePath = 'data//mounted//Pickle Objects//Audio Feature Pickle Objects//' + movie + '.p'
        pickle.dump(audioFeatures[movie], open(audioFeaturePath, "wb" ))
    except KeyError: 
        #movie hasnt been added to collection yet
        print(movie)
    