In [1]:
import pandas as pd
import numpy as np
import scipy.signal as ss
import math
import datetime
import pickle
import random
import copy
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#user macros
gradThreshold = -0.045
preliminaryAlignmentTolerance = 50
filledPercentageConstraint = 10 #movie must have atleast 10% filled to get a decent reading

numerical_data_base_path = 'data/mounted/Numerical Data/'

In [3]:
#voc dataset
vocPath = numerical_data_base_path + 'VOC_30sec.csv'
vocDf = pd.read_csv(vocPath)
vocDf.drop("Unnamed: 0", axis=1, inplace=True)
#screening times
movieScreeningsPath = numerical_data_base_path + 'screening_times.csv'
movingScreeningsDf = pd.read_csv(movieScreeningsPath, usecols = ['scheduled','movie','filled %'])
#movie details 
movieRuntimesPath = numerical_data_base_path + 'movie_runtimes.csv'
movieRuntimeDf = pd.read_csv(movieRuntimesPath, usecols=['movie', 'effective runtime', 'runtime (mins)'])
movieList = list(movieRuntimeDf['movie'])
#create co2 df
co2Df = vocDf[['time', 'co2']]

In [4]:
#filter movie screenings schedule to only get relevant rows and cols
#filter to only get relevant movies
movingScreeningsDf = movingScreeningsDf.loc[movingScreeningsDf['movie'].isin(movieList)]
#filter to only get movies of a certain filled %
movingScreeningsDf = movingScreeningsDf.loc[movingScreeningsDf['filled %'] > filledPercentageConstraint]
movingScreeningsDf.shape

(130, 3)

In [5]:
#data required for the data alignment
scheduledTimeList = list(movingScreeningsDf.loc[:]['scheduled'])
movieScreeningList = list(movingScreeningsDf.loc[:]['movie'])
movieList = list(movieRuntimeDf.loc[:]['movie'])
filledPercentageList = list(movingScreeningsDf.loc[:]['filled %'])
vocTimeList = list(co2Df.loc[:]['time'])

In [40]:
#data alignment 
originalVOCFrames,timeList,matchedMovieList,vocScreenings = dataAlignment(scheduledTimeList, movieScreeningList, movieList, vocTimeList, co2Df, preliminaryAlignmentTolerance, gradThreshold, movieRuntimeDf)

#error adjustment
adjustedScreenings = errorAdjustment(vocScreenings, timeList, matchedMovieList,originalVOCFrames,movieRuntimeDf)

#normalised vocs
normalisedScreenings =  normalisation(adjustedScreenings)

#intermission fix
normalisedScreenings = hobbit_intermission_fix(normalisedScreenings,timeList)

In [58]:
#create dataframe
for index in range(0, len(adjustedScreenings)):
    movie = matchedMovieList[index]
    time = timeList[index]
    df_idx = adjustedScreenings[index].index
    screening_df = vocDf.loc[df_idx]
    screening_df.to_csv("data/mounted/Processed VOC Screenings/" + movie + "-" + time)

### Helper

In [6]:
def hobbit_intermission_fix(normalisedScreenings,timeList):
    #remove the break in the hobbit 
    timings_to_fix = ['18-12-2013 14:00','18-12-2013 19:00']
    #iterate through Hobbit 2 timings to remove the break
    for movie, timing in zip(['Hobbit 2']*len(timings_to_fix),timings_to_fix):
        idx = timeList.index(timing)
        screening = normalisedScreenings[idx]

        if timing == '18-12-2013 14:00':
            #indices of screening where the break occurs 
            start_time_idx = 109
            end_time_idx = 123
        else:
            #indices of screening where the break occurs 
            start_time_idx = 118
            end_time_idx = 132

        start_pt = screening[start_time_idx-1]
        end_pt = screening[end_time_idx+1]
        replacement_values = np.linspace(start_pt,end_pt,num=end_time_idx-start_time_idx)
        screening[start_time_idx:end_time_idx] = replacement_values
        

    return normalisedScreenings

In [12]:
def preliminaryAlignment(runtime, vocTime, vocTimeList,preliminaryAlignmentTolerance,co2Df):
    effectiveRuntime = (runtime + preliminaryAlignmentTolerance) * 2 #tolerance added is 15mins and then multiplied by 2 to get the number of 30s intervals
    vocIndex = vocTimeList.index(vocTime)
    vocEndIndex = vocIndex + effectiveRuntime
    vocWindow = co2Df.loc[vocIndex:vocEndIndex][:]
    return vocWindow

def calculateDeltaBetweenPeaks(vocWindow):

    #find peaks 
    peakList = list()
    normalisedPeakList = list()
    #find_peaks returns the index values of the peaks within the VOC frame 
    peaks = ss.find_peaks(vocWindow[:]['co2'].values)
    
    #Using the index values find the actual values of the peaks 
    deltaList = list()
    if len(peaks[0]) != 0:
        for peakIndex in peaks[0]:
            peakList.append(vocWindow[:]['co2'].values[peakIndex])

        #normalise the peaks (divide by highest VOC value)
        maxPeak = max(peakList)
        for peakValue in peakList:
            normalisedPeakList.append(peakValue/maxPeak) 

        #calculate the gradient and distance between peaks
        #the gradientList and distanceList for vocFrame
        
        for peakIndex in range(1, len(normalisedPeakList)):
            prevPeak = normalisedPeakList[peakIndex-1]
            currPeak = normalisedPeakList[peakIndex]
            delta = currPeak - prevPeak
            deltaList.append(delta)
 
    return deltaList, peakList

def gradientAlignment(vocWindow,gradThreshold, effectiveRuntime,vocTime,movieMatched):

    vocList = list()
    
    deltaList, peakList = calculateDeltaBetweenPeaks(vocWindow)

    if len(deltaList) != 0:

        #apply constraints to trim the voc window
        frontIndex = round(len(deltaList)*0.8) #only check the last 20% of the voc window
        vocConstraintWindow = deltaList[frontIndex:]

        if min(vocConstraintWindow) > gradThreshold:

            #if the min gradient in the frame is larger than the threshold then just cut off the last peak
            lastPeakIndex = list(vocWindow[:]['co2'].values).index(peakList[-1])
            firstIndex = lastPeakIndex - effectiveRuntime 
            vocWindow = vocWindow[firstIndex:lastPeakIndex][:]
            vocList.append(vocWindow)
            
        else: 

            #if min gradient in frame is less than threshold then cut off the peak that starts that gradient
            #find the first grad that is lower than the threshold

            for grad in vocConstraintWindow:
                if grad < gradThreshold:
                    gradIndex = deltaList.index(grad)
                    associatedPeak = peakList[gradIndex]
                    endIndex = list(vocWindow[:]['co2'].values).index(associatedPeak)

                    firstIndex = endIndex - effectiveRuntime

                    if firstIndex > 0: #positive index
                        vocWindow = vocWindow[firstIndex:endIndex][:]
                        vocList.append(vocWindow)
                        break

    return vocList

def dataAlignment(scheduledTimeList, movieScreeningList, movieList, vocTimeList, co2Df, preliminaryAlignmentTolerance, gradThreshold, movieRuntimeDf):

    matchedMovieList = list()
    timeList = list() 
    vocScreenings = list()
    originalVOCFrames = list()

    for time, index in zip(scheduledTimeList, range(0,len(scheduledTimeList))):
        if (time in vocTimeList) and (time not in timeList):
            movieMatched = movieScreeningList[index]    
            if movieMatched in movieList:
                movieIndex = movieList.index(movieMatched)      
                effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                runtime = movieRuntimeDf.loc[movieIndex]['runtime (mins)']
                vocWindow = preliminaryAlignment(runtime, time, vocTimeList, preliminaryAlignmentTolerance, co2Df)
                vocList = gradientAlignment(vocWindow,gradThreshold, effectiveRuntime,time,movieMatched)
                if len(vocList) > 0:
                    originalVOCFrames.append(vocWindow)
                    timeList.append(time)
                    matchedMovieList.append(movieMatched)
                    vocScreenings = vocScreenings + vocList
                    
    return originalVOCFrames,timeList,matchedMovieList,vocScreenings


In [34]:
def errorAdjustment(vocList, timeList, matchedMovieList,originalVOCFrames,movieRuntimeDf):
    #VOC Screenings to be manually editted to after inspection

    #Buddy 29-12-2013 19:30
    #Walter Mitty 02-01-2014 17:15
    #The Hunger Games: Catching Fire 05-01-2014 13:45
    #Walter Mitty 05-01-2014 17:15
    #The Hunger Games: Catching Fire 07-01-2014 13:45
    #Paranormal Activity 09-01-2014 20:35
    #Hobbit 2 10-01-2014 16:30
    #Paranormal Activity 10-01-2014 22:35
    #Help I Shrunk 27-12-2015 11:30
    #Help I Shrunk 30-12-2015 11:30
    #Help I Shrunk 02-01-2016 11:30
    # Help I Shrunk 03-01-2016 11:30
    #I'm Off Then 27-12-2015 20:00
    #I'm Off Then 30-12-2015 20:00
    #I'm Off Then 31-12-2015 20:00
    #I'm Off Then 02-01-2016 17:30
    #I'm Off Then 02-01-2016 20:00
    #I'm Off Then 03-01-2016 17:30
    #Star Wars-A Force Awakens 22-12-2015 22:30
    #Star Wars-A Force Awakens 28-12-2015 22:30
    #Star Wars-A Force Awakens 29-12-2015 22:30

    errorList = ['29-12-2013 19:30', '02-01-2014 17:15',
                '05-01-2014 17:15', '09-01-2014 20:35', '10-01-2014 16:30', 
                '10-01-2014 22:35', '22-12-2015 22:30', '28-12-2015 22:30', '29-12-2015 22:30', 
                '27-12-2015 20:00', '30-12-2015 20:00', '31-12-2015 20:00', '02-01-2016 17:30',
                '02-01-2016 20:00','03-01-2016 17:30']
                
    adjustedVOCList = copy.deepcopy(vocList)

    movieList = list(movieRuntimeDf['movie'])

    for errorDate in errorList:
        
        errorIndex = timeList.index(errorDate)
        matchedMovie = matchedMovieList[errorIndex]
        movieIndex = movieList.index(matchedMovie)
        vocFrame = originalVOCFrames[errorIndex]
   
        #Star Wars
        if errorDate == '22-12-2015 22:30':
            endOfMovie = '23-12-2015 00:55'
            movieIndex = movieList.index('Star Wars-The Force Awakens')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        elif errorDate == '28-12-2015 22:30':
            endOfMovie = '29-12-2015 00:59'
            movieIndex = movieList.index('Star Wars-The Force Awakens')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        elif errorDate == '29-12-2015 22:30':
            endOfMovie = '30-12-2015 01:15'
            movieIndex = movieList.index('Star Wars-The Force Awakens')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow

        #2013 movies
        elif errorDate == '27-12-2013 13:15':
            endOfMovie = '27-12-2013 15:59' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        elif errorDate == '29-12-2013 19:30':
            endOfMovie = '29-12-2013 21:28' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  
        elif errorDate == '02-01-2014 17:15':
            endOfMovie = '02-01-2014 19:21' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  
        elif errorDate == '05-01-2014 13:45':
            endOfMovie = '05-01-2014 16:21' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow        
        elif errorDate == '05-01-2014 17:15':
            endOfMovie = '05-01-2014 19:21' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  
        elif errorDate == '07-01-2014 13:45':
            endOfMovie = '07-01-2014 16:10' 
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime 
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow              
        elif errorDate == '09-01-2014 20:35':
            endOfMovie = '09-01-2014 22:28'
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  
        elif errorDate == '10-01-2014 16:30':
            endOfMovie = '10-01-2014 19:50'
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow              
        elif errorDate == '10-01-2014 22:35':
            endOfMovie = '11-01-2014 00:25'
            movieIndex = movieList.index(matchedMovie)
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  

        #I'm Off Then
        elif errorDate == '27-12-2015 20:00':
            endOfMovie = '27-12-2015 21:54'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow            
        elif errorDate == '30-12-2015 20:00':
            endOfMovie = '30-12-2015 21:52'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow            
        elif errorDate == '31-12-2015 20:00':
            endOfMovie = '31-12-2015 21:53'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow            
        elif errorDate == '02-01-2016 17:30':
            endOfMovie = '02-01-2016 19:22'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow  
        elif errorDate == '02-01-2016 20:00':
            endOfMovie = '02-01-2016 21:53'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        elif errorDate == '03-01-2016 17:30':
            endOfMovie = '03-01-2016 19:17'
            movieIndex = movieList.index('I\'m Off Then')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        
        #Help I shrunk the teacher   
        elif errorDate == '30-12-2015 11:30':
            endOfMovie = '30-12-2015 13:26'
            movieIndex = movieList.index('Help, I Shrunk My Teacher')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow
        elif errorDate == '02-01-2016 11:30':
            endOfMovie = '02-01-2016 13:23'
            movieIndex = movieList.index('Help, I Shrunk My Teacher')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime + 1
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow           
        elif errorDate == '03-01-2016 11:30':
            endOfMovie = '03-01-2016 13:23'
            movieIndex = movieList.index('Help, I Shrunk My Teacher')
            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
            vocEndIndex = list(vocFrame[:]['time'].values).index(endOfMovie)
            vocStartIndex = vocEndIndex - effectiveRuntime
            vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
            adjustedVOCList[errorIndex] = vocWindow

    return adjustedVOCList

In [10]:
def normalisation(vocScreenings):
    
    #use minMaxScaler to scale the screenings between 0 and 1 
    
    scaledScreenings = list()
    for index in range(0, len(vocScreenings)):

        screening = vocScreenings[index]

        #scale screening
        screening = screening['co2'].values.reshape(-1,1)
        scaler = MinMaxScaler()
        scaledScreening = scaler.fit_transform(screening).ravel()
        scaledScreenings.append(scaledScreening)
        
    return scaledScreenings