In [108]:
import pandas as pd
import numpy as np
import scipy.signal as ss
import math
import datetime
import pickle
import random
import copy
from sklearn.preprocessing import OneHotEncoder

In [126]:
def processSubtitles(sentiment):
    #sentiment processing 

    #extract sentiment value numbers
    #0 = no dialog
    #1 = negative
    #2 = neutral
    #3 = positive

    sentimentDf = list()
    for index in range(0, len(sentiment)):

        sentimentInstance = sentiment[index]

        if len(sentimentInstance)==0: #no dialog 
            sentimentDf.append(0)
        else:
            sentimentDf.append(int(sentimentInstance['sentimentValue']))
            
    return sentimentDf

def processVisuals(colour,movieRuntimeDf,movieList,movie):
    #colour processing 
    framesPerInterval = 30
    #split into RGB layers
    red = [colourValue[0] for colourValue in colour]
    green = [colourValue[1] for colourValue in colour]
    blue = [colourValue[2] for colourValue in colour]

    #group colour values as instances
    redDf = [red[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    greenDf = [green[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    blueDf = [blue[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    
    return redDf,greenDf,blueDf

def processShade(shade,movieRuntimeDf,movieList,movie):
    #shade processing
    framesPerInterval = 30
    shade = [shadeValue[0] for shadeValue in shade] #extract shade values
    #group shade values as instances
    shadeDf = [shade[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    
    return shadeDf


def generateFeatures():
    movieFeatureDict = dict()

    #import movie runtimes
    movieRuntimesPath = 'Numerical Data/movie_runtimes.csv'
    movieRuntimeDf = pd.read_csv(movieRuntimesPath, usecols = ['movie', 'runtime (mins)', 'effective runtime'])
    movieList = list(movieRuntimeDf['movie'])
    movieFeatureDict = dict() #dict contains the movie film features with the keys being the movies
    #import pickle objects for movies and then assemble the dataframes  
    for movie in movieList:
        #load pickle feauture objects
        featurePath = 'disk/Pickle Objects/Audio Feature Pickle Objects/' + movie + '.p'
        audio = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'disk/Pickle Objects/Colour Pickle Objects/' + movie + '.p'
        colour = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'disk/Pickle Objects/Shade Pickle Objects/' + movie + '.p'
        shade = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'disk/Pickle Objects/Subtitle Sentiment Pickle Objects/' + movie + '.p'
        sentiment = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'disk/Pickle Objects/ASL Pickle Objects/' + movie + '.p'
        asl = pickle.load(open(featurePath, "rb" )) 

        redDf, greenDf, blueDf  = processVisuals(colour,movieRuntimeDf,movieList,movie)
        shadeDf = processShade(shade,movieRuntimeDf,movieList,movie)
        sentimentDf = processSubtitles(sentiment)
        aslDf = asl

        #combine features into a singular dataframe
        header = ['Red ' + str(num) for num in range(1, 31)]
        redDf = pd.DataFrame(redDf, columns=header)
        header = ['Green ' + str(num) for num in range(1, 31)]
        greenDf = pd.DataFrame(greenDf, columns=header)
        header = ['Blue ' + str(num) for num in range(1, 31)]
        blueDf = pd.DataFrame(blueDf, columns=header)
        header = ['Shade ' + str(num) for num in range(1,31)]
        shadeDf = pd.DataFrame(shadeDf, columns=header)
        aslDf = pd.DataFrame(aslDf, columns=['ASL'])
        sentimentDf = pd.DataFrame(sentimentDf, columns=['Sentiment'])

        featureDf = pd.concat([redDf,greenDf,blueDf,shadeDf,aslDf,sentimentDf], axis=1)
        movieFeatureDict[movie] = featureDf

    print('Features Loaded')
    
    return movieFeatureDict

### Singular Features

In [124]:
movieFeatureDict = generateFeatures()

Features Loaded


### Windowed Features

In [128]:
movieFeatureDict = generateFeatures()

#add windowing
for movie in list(movieFeatureDict.keys()):
    
    featureDf = movieFeatureDict[movie]
        
    windowFeaturesDf = pd.DataFrame([])
    for index in range(9, featureDf.shape[0]):
        window = featureDf.loc[index-9:index]
        break
    break
        

Features Loaded


In [129]:
window

Unnamed: 0,Red 1,Red 2,Red 3,Red 4,Red 5,Red 6,Red 7,Red 8,Red 9,Red 10,...,Shade 23,Shade 24,Shade 25,Shade 26,Shade 27,Shade 28,Shade 29,Shade 30,ASL,Sentiment
0,0,0,72,86,82,84,84,82,88,86,...,7,0,0,15,9,13,58,47,3,0
1,33,23,23,23,22,20,5,0,0,1,...,0,1,1,2,3,3,3,2,0,0
2,1,0,0,0,0,7,29,29,27,26,...,34,33,32,30,26,28,27,27,2,0
3,23,22,23,30,27,23,18,19,22,27,...,16,15,16,16,19,21,28,40,3,1
4,25,34,10,20,13,20,35,24,33,33,...,18,21,20,21,21,19,27,21,8,3
5,28,28,44,47,46,43,45,50,45,47,...,28,15,16,24,24,23,24,15,9,1
6,26,14,19,18,25,30,27,28,27,30,...,29,20,20,19,19,25,25,25,9,2
7,29,20,19,19,19,20,41,31,29,38,...,26,19,19,19,19,25,25,24,8,2
8,29,21,24,25,24,26,27,27,35,33,...,18,22,27,29,29,29,27,27,2,1
9,33,19,17,43,46,51,63,34,36,18,...,19,18,20,20,20,23,25,17,10,2
