In [10]:
import pandas as pd
import numpy as np
import pickle
import os

## Features

In [2]:
#import pickle objects for movies and then assemble the dataframes  
def generateFeatures(movieList, movieRuntimeDf):
    
    for movie in movieList:
        runtime = movieRuntimeDf['effective runtime'].loc[movieList.index(movie)]

        #load pickle feauture objects
        featurePath = 'data/mounted/Pickle Objects/Audio Feature Pickle Objects/' + movie + '.p'
        audio = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'data/mounted/Pickle Objects/Colour Pickle Objects/' + movie + '.p'
        colour = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'data/mounted/Pickle Objects/Shade Pickle Objects/' + movie + '.p'
        shade = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'data/mounted/Pickle Objects/Subtitle Sentiment Pickle Objects/' + movie + '.p'
        sentiment = pickle.load(open(featurePath, "rb" )) 
        featurePath = 'data/mounted/Pickle Objects/ASL Pickle Objects/' + movie + '.p'
        asl = pickle.load(open(featurePath, "rb" )) 

        audioDf = processAudio(runtime, audio)
        redDf, greenDf, blueDf  = processVisuals(colour,movieRuntimeDf,movieList,movie)
        shadeDf = processShade(shade,movieRuntimeDf,movieList,movie)
        sentimentDf = processSubtitles(sentiment)
        aslDf = asl

        #combine features into a singular dataframe
        header = ['Red ' + str(num) for num in range(1, 31)]
        redDf = pd.DataFrame(redDf, columns=header)
        header = ['Green ' + str(num) for num in range(1, 31)]
        greenDf = pd.DataFrame(greenDf, columns=header)
        header = ['Blue ' + str(num) for num in range(1, 31)]
        blueDf = pd.DataFrame(blueDf, columns=header)
        header = ['Shade ' + str(num) for num in range(1,31)]
        shadeDf = pd.DataFrame(shadeDf, columns=header)
        aslDf = pd.DataFrame(aslDf, columns=['ASL'])
        sentimentDf = pd.DataFrame(sentimentDf, columns=['Sentiment'])

        featureDf = pd.concat([redDf,greenDf,blueDf,shadeDf,aslDf,sentimentDf,audioDf], axis=1).dropna()
        movieFeatureDict[movie] = featureDf
        
    return movieFeatureDict

In [3]:
def processAudio(runtime, audio):
    audioFeatures = list(audio.keys())

    audioDf = pd.DataFrame(columns=[])        
    for key in audioFeatures:
        audio[key] = audio[key][:runtime]

        #assemble df 
        #create header
        if key != 'tempo':
            header = [key + str(x) for x in range(1, len(audio[key][0])+1)]
        else:
            header = ['tempo']

        audioFeatureDf = pd.DataFrame(columns=header)
        for index in range(0, len(audio[key])):
            feature = audio[key][index]
            audioFeatureDf.loc[index] = feature

        #concatenate featureDf to audioDf
        audioDf = pd.concat([audioDf,audioFeatureDf], axis=1)
    
    return audioDf

def processSubtitles(sentiment):
    #sentiment processing 

    #extract sentiment value numbers
    #0 = no dialog
    #1 = negative
    #2 = neutral
    #3 = positive

    sentimentDf = list()
    for index in range(0, len(sentiment)):

        sentimentInstance = sentiment[index]

        if len(sentimentInstance)==0: #no dialog 
            sentimentDf.append(0)
        else:
            sentimentDf.append(int(sentimentInstance['sentimentValue']))
            
    return sentimentDf

def processVisuals(colour,movieRuntimeDf,movieList,movie):
    #colour processing 
    framesPerInterval = 30
    #split into RGB layers
    red = [colourValue[0] for colourValue in colour]
    green = [colourValue[1] for colourValue in colour]
    blue = [colourValue[2] for colourValue in colour]

    #group colour values as instances
    redDf = [red[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    greenDf = [green[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    blueDf = [blue[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    
    return redDf,greenDf,blueDf

def processShade(shade,movieRuntimeDf,movieList,movie):
    #shade processing
    framesPerInterval = 30
    shade = [shadeValue[0] for shadeValue in shade] #extract shade values
    #group shade values as instances
    shadeDf = [shade[framesPerInterval*index:framesPerInterval*(index+1)] for index in range(0, movieRuntimeDf.loc[movieList.index(movie)]['effective runtime'])]
    
    return shadeDf

### Features

In [4]:
#import movie runtimes
movieRuntimesPath = 'data/mounted/Numerical Data/movie_runtimes.csv'
movieRuntimeDf = pd.read_csv(movieRuntimesPath, usecols = ['movie', 'runtime (mins)', 'effective runtime'])
movieList = list(movieRuntimeDf['movie'])
movieFeatureDict = dict() #dict contains the movie film features with the keys being the movies

In [9]:
#import pickle objects for movies and then assemble the dataframes  
movieFeatureDict = generateFeatures(movieList, movieRuntimeDf)

### VOC output

In [46]:
base_path = 'data/mounted/Processed VOC Screenings/'
os.listdir(base_path)

save_url = "data//mounted//Features & Label Csvs//"

In [61]:
#import all voc's and then connect vocs to features and output the films
voc_screenings = os.listdir(r'data/mounted/Processed VOC Screenings')
for screening_name in voc_screenings:
    #what movie is being processed
    movie = screening_name.split("-")[:-3]
    movie = "-".join(movie)
    
    features = movieFeatureDict[movie]
    
    #read in voc screenings
    screening = pd.read_csv("data//mounted//Processed VOC Screenings//" + screening_name)
    screening.drop('Unnamed: 0', axis=1, inplace=True)
    screening.drop('time', axis=1, inplace=True)
    
    #connect features to vocs
    instance_df = pd.concat([features,screening], ignore_index=True, axis=1)
    
    #write out the csv
    instance_df.to_csv(save_url+screening_name+".csv")

    

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,811,812,813,814,815,816,817,818,819,820
0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,...,0.026795,0.021116,0.021187,0.488460,0.183028,0.118955,0.284463,0.106857,0.136891,0.044089
1,5.0,11.0,16.0,19.0,15.0,13.0,12.0,17.0,16.0,19.0,...,0.025571,0.021022,0.020677,0.490072,0.179206,0.120192,0.275978,0.105354,0.137743,0.038203
2,26.0,24.0,26.0,25.0,25.0,21.0,18.0,21.0,21.0,24.0,...,0.028257,0.020310,0.021309,0.477436,0.176073,0.112718,0.267345,0.101818,0.136309,0.043074
3,5.0,5.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,...,0.025823,0.021696,0.018821,0.491325,0.173187,0.117720,0.273987,0.101864,0.130802,0.042813
4,15.0,13.0,11.0,9.0,7.0,6.0,4.0,3.0,2.0,1.0,...,0.028563,0.020165,0.024099,0.497802,0.171847,0.116037,0.270119,0.102968,0.135821,0.044662
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,68.0,93.0,20.0,14.0,21.0,29.0,12.0,19.0,24.0,12.0,...,0.032862,0.024343,0.024551,0.334274,0.122282,0.080076,0.194036,0.075118,0.097756,0.028365
249,38.0,39.0,38.0,66.0,64.0,115.0,147.0,151.0,137.0,131.0,...,0.031384,0.022806,0.025800,0.333106,0.119089,0.082255,0.198748,0.071494,0.099826,0.029514
250,37.0,36.0,35.0,35.0,35.0,39.0,47.0,59.0,83.0,96.0,...,0.032948,0.024128,0.024806,0.353416,0.124794,0.082435,0.188219,0.070498,0.098239,0.029765
251,124.0,131.0,130.0,116.0,106.0,107.0,107.0,57.0,57.0,53.0,...,0.033974,0.022638,0.025842,0.381679,0.141599,0.091683,0.221000,0.078768,0.109291,0.038279


## Save the features and labels as csvs

In [12]:
#load the warped screenings 
warpedScreeningDict = pickle.load(open("disk/Pickle Objects/warpedMovies.p",'rb'))
#warpedScreeningDict contains each movie. Each element contains a dict of the movie screenings and the DTW paths 

movieList = list(warpedScreeningDict.keys()) #get list of movies

createFeatureLabelDf(movieList,warpedScreeningDict,movieFeatureDict, True)

Buddy
Help, I Shrunk My Teacher
Hobbit 2
I'm Off Then
Machete Kills
Paranormal Activity
Star Wars-The Force Awakens
Suck Me Shakespeer
The Hunger Games-Catching Fire
Walking with Dinosaurs
Walter Mitty
