In [69]:
import pandas as pd
import numpy as np
import scipy.signal as ss
import math
import datetime
from dtw import dtw 
import pickle
import random
import copy

import matplotlib.pyplot as plt
%matplotlib inline

In [70]:
#read in the various csvs

#2013 Dataset
vocPath = 'Numerical Data/CO2data.csv'
co2Df = pd.read_csv(vocPath, usecols = ['Time','CO2'], header = 0, nrows = 74208)
movieScreeningsPath = 'Numerical Data/screening_times.csv'
movingScreeningsDf = pd.read_csv(movieScreeningsPath, usecols = ['scheduled','movie','filled %'])
movieRuntimesPath = 'Numerical Data/movie_runtimes.csv'
movieRuntimeDf = pd.read_csv(movieRuntimesPath, usecols = ['movie', 'runtime (mins)', 'effective runtime'])

#2015 Dataset
starWarsPath = 'Numerical Data/Star Wars-The Force Awakens.csv'
starWarsScreeningDf = pd.read_csv(starWarsPath)
imOffThenPath = 'Numerical Data/I\'m Off Then.csv'
imOffThenScreeningDf = pd.read_csv(imOffThenPath)
helpIShrunkTheTeacherPath = 'Numerical Data/Help, I\'ve Shrunk The Teacher.csv'
helpIShrunkTheTeacherScreeningDf = pd.read_csv(helpIShrunkTheTeacherPath)
vocPath = 'Numerical Data/final_data_ppb.csv'
cinestar2015Co2Df = pd.read_csv(vocPath, usecols = ['Time', 'CO2'])

In [71]:
#VOC DATAFRAME

#VOC timings with datetime object
for i in range(0,co2Df.shape[0]):
    vocTime = co2Df.loc[i]['Time']
    vocTime = vocTime[1:len(vocTime)-1]
    date = datetime.datetime.strptime(vocTime, "%m/%d/%Y %H:%M:%S")
    co2Df.at[i,'Time'] = date.strftime('%d-%m-%Y %H:%M')

for i in range(0, cinestar2015Co2Df.shape[0]):
    vocTime = cinestar2015Co2Df.loc[i]['Time']
    date = datetime.datetime.strptime(vocTime, "%d/%m/%Y %H:%M")
    cinestar2015Co2Df.at[i,'Time'] = date.strftime('%d-%m-%Y %H:%M')

In [72]:
#MOVIE SCREENINGS
#change scheduled time into standardised format

#2013
for i in range(0,movingScreeningsDf.shape[0]):
    scheduledTime = movingScreeningsDf.loc[i]['scheduled']
    scheduledTimeObj = datetime.datetime.strptime(scheduledTime, "%d/%m/%Y %H:%M")
    scheduledTime = scheduledTimeObj.strftime('%d-%m-%Y %H:%M')
    movingScreeningsDf.at[i,'scheduled'] = scheduledTime
    
#2015 Star Wars
for i in range(0, starWarsScreeningDf.shape[0]):
    beginTime = starWarsScreeningDf.loc[i]['Start']
    beginTimeObj = datetime.datetime.strptime(beginTime,  "%d/%m/%Y %H:%M")
    beginTime = beginTimeObj.strftime('%d-%m-%Y %H:%M')
    starWarsScreeningDf.at[i,'Start'] = beginTime

#2015 I'm Off Then
for i in range(0, imOffThenScreeningDf.shape[0]):
    beginTime = imOffThenScreeningDf.loc[i]['Start']
    beginTimeObj = datetime.datetime.strptime(beginTime,  "%d/%m/%Y %H:%M")
    beginTime = beginTimeObj.strftime('%d-%m-%Y %H:%M')
    imOffThenScreeningDf.at[i,'Start'] = beginTime
    
#2015 Help, I Shrunk The Teacher
for i in range(0, helpIShrunkTheTeacherScreeningDf.shape[0]):
    beginTime = helpIShrunkTheTeacherScreeningDf.loc[i]['Start']
    beginTimeObj = datetime.datetime.strptime(beginTime,  "%d/%m/%Y %H:%M")
    beginTime = beginTimeObj.strftime('%d-%m-%Y %H:%M')
    helpIShrunkTheTeacherScreeningDf.at[i,'Start'] = beginTime

### Peak Detection Algorithm

1. Align the VOC frames using the scheduled times and add some tolerance at the end 
2. Go to the scheduled end within the voc frame
3. check for the last peak/last crest

## Data Alignment

In [73]:
#2013 Alignment 

vocList = list()
scheduledTimeList = [x for x in movingScreeningsDf.loc[:]['scheduled']]
movieScreeningList = [x for x in movingScreeningsDf.loc[:]['movie']]
movieList = [x for x in movieRuntimeDf.loc[:]['movie']]
vocTimeList = [x for x in co2Df.loc[:]['Time']]
filledPercentageList = [x for x in movingScreeningsDf.loc[:]['filled %']]
matchedMovieList = list()
timeList = list()

gradThreshold = -0.045

skipSecondInterval = False
originalVOCFrames = list()

for vocTime in vocTimeList:
    
    try:
        timeIndex = scheduledTimeList.index(vocTime)
    except:
        continue 
        
    if vocTime not in timeList:

        if filledPercentageList[timeIndex] > 10: #only use well filled movies

            #Preliminary Alignment 
            movieMatched = movieScreeningList[timeIndex]    #find matched movie
            try:
                movieIndex = movieList.index(movieMatched)      #find runtime of matched movie
            except ValueError:
                continue
                
            runtime = movieRuntimeDf.loc[movieIndex]['runtime (mins)']
            effectiveRuntime = (runtime + 50) * 2 #tolerance added is 15mins and then multiplied by 2 to get the number of 30s intervals
            vocIndex = vocTimeList.index(vocTime)
            vocEndIndex = vocIndex + effectiveRuntime
            vocWindow = co2Df.loc[vocIndex:vocEndIndex][:]
            originalVocFrame = co2Df.loc[vocIndex:vocEndIndex][:]

            #Delta Gradient Alignment 
            peakList = list()
            peakTimeList = list()
            normalisedPeakList = list()
            #find_peaks returns the index values of the peaks within the VOC frame 
            peaks = ss.find_peaks(vocWindow[:]['CO2'].values)
            #Using the index values find the actual values of the peaks 
            if len(peaks[0]) != 0:
                for peakIndex in peaks[0]:
                    peakList.append(vocWindow[:]['CO2'].values[peakIndex])

                #normalise the peaks (divide by highest VOC value)
                maxPeak = max(peakList)
                for peakValue in peakList:
                    normalisedPeakList.append(peakValue/maxPeak) 

                #calculate the gradient and distance between peaks
                #the gradientList and distanceList for vocFrame
                gradientList = list()
                distanceList = list()
                for peakIndex in range(1, len(normalisedPeakList)):
                    prevPeak = normalisedPeakList[peakIndex-1]
                    currPeak = normalisedPeakList[peakIndex]
                    grad = currPeak - prevPeak
                    dist = math.sqrt((currPeak - prevPeak)**2 + 1)
                    gradientList.append(grad)
                    distanceList.append(dist)

                #apply constraints to trim the voc window
                frontIndex = round(len(gradientList)*0.8) #only check the last 25% of the voc window for the delta constraint
                vocGradWindow = gradientList[frontIndex:]

                if min(vocGradWindow) > gradThreshold:

                    #if the min gradient in the frame is larger than the threshold then just cut off the last peak
                    lastPeakIndex = list(vocWindow[:]['CO2'].values).index(peakList[-1])
                    #find first index from taking off the effective runtime
                    movieIndex = movieList.index(movieMatched)
                    effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                    firstIndex = lastPeakIndex - effectiveRuntime 
                    vocWindow = vocWindow[firstIndex:lastPeakIndex][:]
                    vocList.append(vocWindow)
                    timeList.append(vocTime)
                    matchedMovieList.append(movieMatched)
                    originalVOCFrames.append(originalVocFrame)

                else: 

                    #if min gradient in frame is less than threshold then cut off the peak that starts that gradient
                    #find the first grad that is lower than the threshold

                    for grad in vocGradWindow:
                        if grad < gradThreshold:
                            gradIndex = gradientList.index(grad)
                            associatedPeak = peakList[gradIndex]
                            endIndex = list(vocWindow[:]['CO2'].values).index(associatedPeak)

                            movieIndex = movieList.index(movieMatched)
                            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                            firstIndex = endIndex - effectiveRuntime

                            if firstIndex > 0: #positive index

                                vocWindow = vocWindow[firstIndex:endIndex][:]
                                vocList.append(vocWindow)
                                timeList.append(vocTime)
                                matchedMovieList.append(movieMatched)
                                originalVOCFrames.append(originalVocFrame) 
                                break




In [74]:
#2015 Star Wars Alignment  

vocStarWars = list()
scheduledTimeStarWars = [x for x in starWarsScreeningDf.loc[:]['Start']]
vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]
filledPercentageStarWars = [x for x in starWarsScreeningDf.loc[:]['filled %']]
movieList = [x for x in movieRuntimeDf.loc[:]['movie']]
timeListStarWars = list()

gradThreshold = -0.045

skipSecondInterval = False

for vocTime in vocTimeList:
    try:
        timeIndex = scheduledTimeStarWars.index(vocTime)
    except:
        continue 
        
    if vocTime not in timeListStarWars:

        if filledPercentageStarWars[timeIndex] > 10: #only use well filled movies
            #Preliminary Alignment 
            movieIndex = movieList.index('Star Wars-The Force Awakens')
            runtime = movieRuntimeDf.loc[movieIndex]['runtime (mins)']
            effectiveRuntime = (runtime + 50) * 2 #tolerance added is 15mins and then multiplied by 2 to get the number of 30s intervals
            vocIndex = vocTimeList.index(vocTime)
            vocEndIndex = vocIndex + effectiveRuntime
            vocWindow = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]
            originalVocFrame = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]

            #Delta Gradient Alignment 
            peakList = list()
            peakTimeList = list()
            normalisedPeakList = list()
            #find_peaks returns the index values of the peaks within the VOC frame 
            peaks = ss.find_peaks(vocWindow[:]['CO2'].values)
            #Using the index values find the actual values of the peaks 
            if len(peaks[0]) != 0:
                for peakIndex in peaks[0]:
                    peakList.append(vocWindow[:]['CO2'].values[peakIndex])

                #normalise the peaks (divide by highest VOC value)
                maxPeak = max(peakList)
                for peakValue in peakList:
                    normalisedPeakList.append(peakValue/maxPeak) 

                #calculate the gradient and distance between peaks
                #the gradientList and distanceList for vocFrame
                gradientList = list()
                distanceList = list()
                for peakIndex in range(1, len(normalisedPeakList)):
                    prevPeak = normalisedPeakList[peakIndex-1]
                    currPeak = normalisedPeakList[peakIndex]
                    grad = currPeak - prevPeak
                    dist = math.sqrt((currPeak - prevPeak)**2 + 1)
                    gradientList.append(grad)
                    distanceList.append(dist)

                #apply constraints to trim the voc window
                frontIndex = round(len(gradientList)*0.8) #only check the last 25% of the voc window for the delta constraint
                vocGradWindow = gradientList[frontIndex:]

                if min(vocGradWindow) > gradThreshold:

                    #if the min gradient in the frame is larger than the threshold then just cut off the last peak
                    lastPeakIndex = list(vocWindow[:]['CO2'].values).index(peakList[-1])
                    #find first index from taking off the effective runtime
                    movieIndex = movieList.index('Star Wars-The Force Awakens')
                    effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                    firstIndex = lastPeakIndex - effectiveRuntime
                    vocWindow = vocWindow[firstIndex:lastPeakIndex][:]
                    vocStarWars.append(vocWindow)
                    timeListStarWars.append(vocTime)
                    timeList.append(vocTime)
                    originalVOCFrames.append(originalVocFrame)

                else: 


                    for grad in vocGradWindow:
                        if grad < gradThreshold:
                            gradIndex = gradientList.index(grad)
                            associatedPeak = peakList[gradIndex]
                            endIndex = list(vocWindow[:]['CO2'].values).index(associatedPeak)

                            movieIndex = movieList.index('Star Wars-The Force Awakens')
                            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                            firstIndex = endIndex - effectiveRuntime

                            if firstIndex > 0: #positive index

                                vocWindow = vocWindow[firstIndex:endIndex][:]
                                vocStarWars.append(vocWindow)
                                timeListStarWars.append(vocTime)
                                timeList.append(vocTime)
                                originalVOCFrames.append(originalVocFrame)  
                                break





In [75]:
#2015 I'm Off Then Alignment

vocImOffThen = list()
vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]
scheduledTimeImOffThen = [x for x in imOffThenScreeningDf.loc[:]['Start']]
filledPercentageImOffThen = [x for x in imOffThenScreeningDf.loc[:]['filled %']]
timeListImOffThen = list()
gradThreshold = -0.045

skipSecondInterval = False

for vocTime in vocTimeList:
    try:
        timeIndex = scheduledTimeImOffThen.index(vocTime)
    except:
        continue 

    if vocTime not in timeListImOffThen:
        if filledPercentageImOffThen[timeIndex] > 10: #only use well filled movies
            #Preliminary Alignment 
            movieIndex = movieList.index('I\'m Off Then')
            runtime = movieRuntimeDf.loc[movieIndex]['runtime (mins)']
            effectiveRuntime = (runtime + 50) * 2 #tolerance added is 15mins and then multiplied by 2 to get the number of 30s intervals
            vocIndex = vocTimeList.index(vocTime)
            vocEndIndex = vocIndex + effectiveRuntime
            vocWindow = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]
            originalVocFrame = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]

            #Delta Gradient Alignment 
            peakList = list()
            peakTimeList = list()
            normalisedPeakList = list()
            #find_peaks returns the index values of the peaks within the VOC frame 
            peaks = ss.find_peaks(vocWindow[:]['CO2'].values)
            #Using the index values find the actual values of the peaks 
            if len(peaks[0]) != 0:
                for peakIndex in peaks[0]:
                    peakList.append(vocWindow[:]['CO2'].values[peakIndex])

                #normalise the peaks (divide by highest VOC value)
                maxPeak = max(peakList)
                for peakValue in peakList:
                    normalisedPeakList.append(peakValue/maxPeak) 

                #calculate the gradient and distance between peaks
                #the gradientList and distanceList for vocFrame
                gradientList = list()
                distanceList = list()
                for peakIndex in range(1, len(normalisedPeakList)):
                    prevPeak = normalisedPeakList[peakIndex-1]
                    currPeak = normalisedPeakList[peakIndex]
                    grad = currPeak - prevPeak
                    dist = math.sqrt((currPeak - prevPeak)**2 + 1)
                    gradientList.append(grad)
                    distanceList.append(dist)

                #apply constraints to trim the voc window
                frontIndex = round(len(gradientList)*0.8) #only check the last 25% of the voc window for the delta constraint
                vocGradWindow = gradientList[frontIndex:]

                if min(vocGradWindow) > gradThreshold:

                    #if the min gradient in the frame is larger than the threshold then just cut off the last peak
                    lastPeakIndex = list(vocWindow[:]['CO2'].values).index(peakList[-1])
                    #find first index from taking off the effective runtime
                    movieIndex = movieList.index('I\'m Off Then')
                    effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                    firstIndex = lastPeakIndex - effectiveRuntime
                    vocWindow = vocWindow[firstIndex:lastPeakIndex][:]

                    vocImOffThen.append(vocWindow)
                    timeListImOffThen.append(vocTime)
                    timeList.append(vocTime)
                    originalVOCFrames.append(originalVocFrame)

                else: 

                    #if min gradient in frame is less than threshold then cut off the peak that starts that gradient
                    #find the first grad that is lower than the threshold

                    for grad in vocGradWindow:
                        if grad < gradThreshold:
                            gradIndex = gradientList.index(grad)
                            associatedPeak = peakList[gradIndex]
                            endIndex = list(vocWindow[:]['CO2'].values).index(associatedPeak)

                            movieIndex = movieList.index('I\'m Off Then')
                            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                            firstIndex = endIndex - effectiveRuntime

                            if firstIndex > 0: #positive index
                                vocWindow = vocWindow[firstIndex:endIndex][:]

                                vocImOffThen.append(vocWindow)

                                timeListImOffThen.append(vocTime)
                                timeList.append(vocTime)
                                originalVOCFrames.append(originalVocFrame)

                                break






In [76]:
#2015 Help I Shrunk The Teacher

vocHelpIShrunk = list()
vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]
scheduledTimeHelpIShrunk = [x for x in helpIShrunkTheTeacherScreeningDf.loc[:]['Start']]
filledPercentageHelpIShrunk = [x for x in helpIShrunkTheTeacherScreeningDf.loc[:]['filled %']]
timeListHelpIShrunk = list()
gradThreshold = -0.045

skipSecondInterval = False

for vocTime in vocTimeList:
    try:
        timeIndex = scheduledTimeHelpIShrunk.index(vocTime)
    except:
        continue
        
    if vocTime not in timeListHelpIShrunk:

        if filledPercentageHelpIShrunk[timeIndex] > 10: #only use well filled movies
            #Preliminary Alignment 
            movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
            runtime = movieRuntimeDf.loc[movieIndex]['runtime (mins)']
            effectiveRuntime = (runtime + 50) * 2 #tolerance added is 15mins and then multiplied by 2 to get the number of 30s intervals
            vocIndex = vocTimeList.index(vocTime)
            vocEndIndex = vocIndex + effectiveRuntime
            vocWindow = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]
            originalVocFrame = cinestar2015Co2Df.loc[vocIndex:vocEndIndex][:]


            #Delta Gradient Alignment 
            peakList = list()
            peakTimeList = list()
            normalisedPeakList = list()
            #find_peaks returns the index values of the peaks within the VOC frame 
            peaks = ss.find_peaks(vocWindow[:]['CO2'].values)
            #Using the index values find the actual values of the peaks 
            if len(peaks[0]) != 0:
                for peakIndex in peaks[0]:
                    peakList.append(vocWindow[:]['CO2'].values[peakIndex])

                #normalise the peaks (divide by highest VOC value)
                maxPeak = max(peakList)
                for peakValue in peakList:
                    normalisedPeakList.append(peakValue/maxPeak) 

                #calculate the gradient and distance between peaks
                #the gradientList and distanceList for vocFrame
                gradientList = list()
                distanceList = list()
                for peakIndex in range(1, len(normalisedPeakList)):
                    prevPeak = normalisedPeakList[peakIndex-1]
                    currPeak = normalisedPeakList[peakIndex]
                    grad = currPeak - prevPeak
                    dist = math.sqrt((currPeak - prevPeak)**2 + 1)
                    gradientList.append(grad)
                    distanceList.append(dist)

                #apply constraints to trim the voc window
                frontIndex = round(len(gradientList)*0.8) #only check the last 25% of the voc window for the delta constraint
                vocGradWindow = gradientList[frontIndex:]

                if min(vocGradWindow) > gradThreshold:

                    #if the min gradient in the frame is larger than the threshold then just cut off the last peak
                    lastPeakIndex = list(vocWindow[:]['CO2'].values).index(peakList[-1])
                    #find first index from taking off the effective runtime
                    movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
                    effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                    firstIndex = lastPeakIndex - effectiveRuntime
                    vocWindow = vocWindow[firstIndex:lastPeakIndex][:]
                    vocHelpIShrunk.append(vocWindow)
                    timeListHelpIShrunk.append(vocTime)
                    timeList.append(vocTime)
                    originalVOCFrames.append(originalVocFrame)

                else: 


                    #if min gradient in frame is less than threshold then cut off the peak that starts that gradient
                    #find the first grad that is lower than the threshold

                    for grad in vocGradWindow:
                        if grad < gradThreshold:
                            gradIndex = gradientList.index(grad)
                            associatedPeak = peakList[gradIndex]
                            endIndex = list(vocWindow[:]['CO2'].values).index(associatedPeak)

                            movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
                            effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
                            firstIndex = endIndex - effectiveRuntime

                            if firstIndex > 0: #positive index

                                vocWindow = vocWindow[firstIndex:endIndex][:]
                                vocHelpIShrunk.append(vocWindow)
                                timeListHelpIShrunk.append(vocTime)
                                timeList.append(vocTime)
                                originalVOCFrames.append(originalVocFrame)
                                break





## Manual Editting

In [77]:
#2015 Manual Editting
#VOC Screenings to be manually editted after inspection

#Help I Shrunk 27-12-2015 11:30
#Help I Shrunk 30-12-2015 11:30
#Help I Shrunk 02-01-2016 11:30
#Help I Shrunk 03-01-2016 11:30

vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]
vocHelpIShrunkAdjusted = copy.deepcopy(vocHelpIShrunk)

errorDates = ['27-12-2015 11:30', '30-12-2015 11:30', '02-01-2016 11:30', '03-01-2016 11:30']
for errorDate in errorDates:
    errorIndex = timeList.index(errorDate)
    index = timeListHelpIShrunk.index(errorDate)
    vocFrame = originalVOCFrames[errorIndex]
    if errorDate == '27-12-2015 11:30':
        endOfMovie = '27-12-2015 13:30'
        movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocHelpIShrunkAdjusted[index] = vocWindow
    
    elif errorDate == '30-12-2015 11:30':
        endOfMovie = '30-12-2015 13:26'
        movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocHelpIShrunkAdjusted[index] = vocWindow

    elif errorDate == '02-01-2016 11:30':
        endOfMovie = '02-01-2016 13:23'
        movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime + 1
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocHelpIShrunkAdjusted[index] = vocWindow
        
    elif errorDate == '03-01-2016 11:30':
        endOfMovie = '03-01-2016 13:23'
        movieIndex = movieList.index('Help, I\'ve Shrunk My Teacher')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocHelpIShrunkAdjusted[index] = vocWindow

In [78]:
#2015 Manual Editting
#VOC Screenings to be manually editted to after inspection

#I'm Off Then 27-12-2015 20:00
#I'm Off Then 30-12-2015 20:00
#I'm Off Then 31-12-2015 20:00
#I'm Off Then 02-01-2016 17:30
#I'm Off Then 02-01-2016 20:00
#I'm Off Then 03-01-2016 17:30
vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]

vocImOffThenAdjusted = copy.deepcopy(vocImOffThen)

errorDates = ['27-12-2015 20:00', '30-12-2015 20:00', '31-12-2015 20:00', '02-01-2016 17:30','02-01-2016 20:00','03-01-2016 17:30']
for errorDate in errorDates:
    errorIndex = timeList.index(errorDate)
    index = timeListImOffThen.index(errorDate)
    vocFrame = originalVOCFrames[errorIndex]
    if errorDate == '27-12-2015 20:00':
        endOfMovie = '27-12-2015 21:54'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow
        
    elif errorDate == '30-12-2015 20:00':
        endOfMovie = '30-12-2015 21:52'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow
        
    elif errorDate == '31-12-2015 20:00':
        endOfMovie = '31-12-2015 21:53'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow
        
    elif errorDate == '02-01-2016 17:30':
        endOfMovie = '02-01-2016 19:22'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow
    
    elif errorDate == '02-01-2016 20:00':
        endOfMovie = '02-01-2016 21:53'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow

    elif errorDate == '03-01-2016 17:30':
        endOfMovie = '03-01-2016 19:17'
        movieIndex = movieList.index('I\'m Off Then')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocImOffThenAdjusted[index] = vocWindow


In [79]:
#2013 Manual Editting
#VOC Screenings to be manually editted to after inspection

#The Hunger Games: Catching Fire 27-12-2013 13:15
#Buddy 29-12-2013 19:30
#Walter Mitty 02-01-2014 17:15
#The Hunger Games: Catching Fire 05-01-2014 13:45
#Walter Mitty 05-01-2014 17:15
#The Hunger Games: Catching Fire 07-01-2014 13:45
#Paranormal Activity 09-01-2014 20:35
#Hobbit 2 10-01-2014 16:30
#Paranormal Activity 10-01-2014 22:35
errorList = ['27-12-2013 13:15', '29-12-2013 19:30', '02-01-2014 17:15', '05-01-2014 13:45',
             '05-01-2014 17:15', '07-01-2014 13:45', '09-01-2014 20:35', '10-01-2014 16:30', 
             '10-01-2014 22:35']
adjustedVOCList = copy.deepcopy(vocList)

for errorDate in errorList: 
    errorIndex = timeList.index(errorDate)
    matchedMovie = matchedMovieList[errorIndex]
    vocFrame = originalVOCFrames[errorIndex]
    
    if errorDate == '27-12-2013 13:15':
        endOfMovie = '27-12-2013 15:59' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow

    elif errorDate == '29-12-2013 19:30':
        endOfMovie = '29-12-2013 21:28' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  

    elif errorDate == '02-01-2014 17:15':
        endOfMovie = '02-01-2014 19:21' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  

    elif errorDate == '05-01-2014 13:45':
        endOfMovie = '05-01-2014 16:21' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  
        
    elif errorDate == '05-01-2014 17:15':
        endOfMovie = '05-01-2014 19:21' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  

    elif errorDate == '07-01-2014 13:45':
        endOfMovie = '07-01-2014 16:10' 
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime 
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  
        
    elif errorDate == '09-01-2014 20:35':
        endOfMovie = '09-01-2014 22:28'
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  

    elif errorDate == '10-01-2014 16:30':
        endOfMovie = '10-01-2014 19:50'
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  
        
    elif errorDate == '10-01-2014 22:35':
        endOfMovie = '11-01-2014 00:25'
        movieIndex = movieList.index(matchedMovie)
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        adjustedVOCList[errorIndex] = vocWindow  
        

In [80]:
#2015 Manual Editting
#VOC Screenings to be manually editted to after inspection

#Star Wars-A Force Awakens 22-12-2015 22:30
#Star Wars-A Force Awakens 28-12-2015 22:30
#Star Wars-A Force Awakens 29-12-2015 22:30

vocTimeList = [x for x in cinestar2015Co2Df.loc[:]['Time']]
vocStarWarsAdjusted = copy.deepcopy(vocStarWars)

errorDates = ['22-12-2015 22:30', '28-12-2015 22:30', '29-12-2015 22:30']
for errorDate in errorDates:
    errorIndex = timeList.index(errorDate)
    index = timeListStarWars.index(errorDate)
    vocFrame = originalVOCFrames[errorIndex]
    if errorDate == '22-12-2015 22:30':
        endOfMovie = '23-12-2015 00:55'
        movieIndex = movieList.index('Star Wars-The Force Awakens')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocStarWarsAdjusted[index] = vocWindow
    elif errorDate == '28-12-2015 22:30':
        endOfMovie = '29-12-2015 00:59'
        movieIndex = movieList.index('Star Wars-The Force Awakens')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocStarWarsAdjusted[index] = vocWindow
    elif errorDate == '29-12-2015 22:30':
        endOfMovie = '30-12-2015 01:15'
        movieIndex = movieList.index('Star Wars-The Force Awakens')
        effectiveRuntime = movieRuntimeDf.loc[movieIndex]['effective runtime']
        vocEndIndex = list(vocFrame[:]['Time'].values).index(endOfMovie)
        vocStartIndex = vocEndIndex - effectiveRuntime
        vocWindow = vocFrame[vocStartIndex:vocEndIndex][:]
        vocStarWarsAdjusted[index] = vocWindow

## Create Datasets

In [81]:
#Assign vocs to corresponding movie lists

def vocAssignment(matchedMovieList, normalisedVOCList):
    hobbitVOCList = list()
    walterVOCList = list()
    buddyVOCList = list()
    macheteVOCList = list()
    paranormalActivityVOCList = list()
    hungergamesVOCList = list()

    for movieIndex in range(0,len(matchedMovieList)):
        movie = matchedMovieList[movieIndex]
        vocFrame = normalisedVOCList[movieIndex]
        if movie == 'Hobbit 2':
            hobbitVOCList.append(vocFrame)
        elif movie == 'Buddy':
            buddyVOCList.append(vocFrame)
        elif movie == 'Walter Mitty':
            walterVOCList.append(vocFrame)
        elif movie == 'Paranormal Activity':
            paranormalActivityVOCList.append(vocFrame)
        elif movie == 'The Hunger Games-Catching Fire':
            hungergamesVOCList.append(vocFrame)
        elif movie == 'Machete Kills':
            macheteVOCList.append(vocFrame)

    #add to a dictionary 
    movieVOC = dict()
    movieVOC['Hobbit 2'] = hobbitVOCList
    movieVOC['Buddy'] = buddyVOCList
    movieVOC['Walter Mitty'] = walterVOCList
    movieVOC['Paranormal Activity'] = paranormalActivityVOCList
    movieVOC['The Hunger Games-Catching Fire'] = hungergamesVOCList
    movieVOC['Machete Kills'] = macheteVOCList
    
    return movieVOC

In [82]:
#save adjusted voc frames, prelim aligned voc frames and delta aligned voc frame

deltaAlignedVOC = dict()
adjustedVOC = dict()
#assign voc frames to each movie
adjustedVOC = vocAssignment(matchedMovieList, adjustedVOCList) #manually adjusted
deltaAlignedVOC = vocAssignment(matchedMovieList, vocList)     #delta aligned

#concatenate 2013 and 2015 lists


#add 2015 movies to the adjusted and delta aligned data sets
adjustedVOC['I\'m Off Then'] = vocImOffThenAdjusted
adjustedVOC['Help, I\'ve Shrunk My Teacher'] = vocHelpIShrunkAdjusted
adjustedVOC['Star Wars-The Force Awakens'] = vocStarWarsAdjusted

deltaAlignedVOC['I\'m Off Then'] = vocImOffThen
deltaAlignedVOC['Help, I\'ve Shrunk My Teacher'] = vocHelpIShrunk
deltaAlignedVOC['Star Wars-The Force Awakens'] = vocStarWars

In [83]:
#normalise voc frames and then save them 
#normalise the dataframes
normalisedVOCList = list()
normalisedVOC = dict()

for movie in movieList:
    screenings = adjustedVOC[movie]
    normalisedVOCList = list()
    for screeningNumber in range(0, len(screenings)):
        screening = screenings[screeningNumber]
        normalisedVOCFrame = screening[:]['CO2'].values/max(screening[:]['CO2'].values)
        normalisedScreening = screening[:][:]
        normalisedScreening[:]['CO2'] = normalisedVOCFrame
        normalisedVOCList.append(normalisedScreening)
    normalisedVOC[movie] = normalisedVOCList
    
#save manually adjusted and delta aligned datasets
pickle.dump(normalisedVOC, open( "adjustedVOCs.p", "wb" ) ) #delta aligned and manually adjusted 

## Data Augmentation - DTW & Distance Checks

Dynamic time warp and distance check the adjusted VOC frames

Only keep the portions of the voc graphs with close matchings

In [None]:
def DistanceAugmentation(modifiedBaseScreening, plot2):
    #plot1 is the base plot
    #plot 2 is the compared plot
    modifiedScreening = modifiedBaseScreening[:][:]
    modifiedPlot2 = list()
    distanceThreshold = 0.01
    plot1 = modifiedBaseScreening['CO2'].values
    timing = modifiedBaseScreening['Time'].values
    timingList = list()
    for pointIndex in range(0, len(plot2)):
        distance = abs(plot1[pointIndex] - plot2[pointIndex])
        if distance <  distanceThreshold:
            modifiedPlot2.append(plot2[pointIndex])
            timingList.append(timing[pointIndex])
        else:
            modifiedPlot2.append(nan)
            timingList.append(timing[pointIndex])
    modifiedScreening[:]['CO2'] = modifiedPlot2
    return modifiedScreening

In [None]:
#perform dynamic time warping and use distance checks 
def MovieDTW(vocDictionary ,baselineVOC, movieList):
    
    dtwScreenings = dict()
    distanceScreenings = dict()
    for movie in movieList:
        movieDTWList = list()
        movieDistanceList = list()
        vocScreenings = vocDictionary[movie]
        baseMovie = (baselineVOC[movie]['CO2'].values).reshape(-1,1)
        for screening in vocScreenings:
            comparedMovie = (screening['CO2'].values).reshape(-1,1)
            euclidean_norm = lambda baseMovie, comparedMovie: np.abs(baseMovie - comparedMovie)
            dist, cost, acc, path = dtw(baseMovie, comparedMovie, dist=euclidean_norm)
            path1 = path[0]
            path2 = path[1]

            plot1 = baseMovie[path1]
            plot2 = comparedMovie[path2]
            print(len(baselineVOC[movie]['CO2'].values))
            print(len(plot1))
            print(len(plot2))
            print()
#             modifiedBaseScreening = (baselineVOC[movie])[:][:]
#             modifiedBaseScreening[:]['CO2'] = plot1
            
            
            movieDTWList.append(plot2)
#             modifiedPlot2 = DistanceAugmentation(modifiedBaseScreening, plot2)
#             movieDistanceList.append(modifiedPlot2)
       
        dtwScreenings[movie] = movieDTWList
        distanceScreenings[movie] = movieDistanceList
        
    return dtwScreenings, distanceScreenings

In [None]:
#baseline VOC plots

#method: look for baseline VOC curves 

#Hobbit 18-12-2013 14:00
#Machete 23-12-2013 19:35
#Buddy 25-12-2013 14:50
#Hunger Games 27-12-2013 13:15
#Walter 02-01-2014 20:15
#Paranormal Activity 09-01-2014 20:35
#Star Wars: The Force Awakens 26-12-2015 17:30
#I'm Off Then 26-12-2015 17:30
#Help, I've Shrunk The Teacher 30-12-2015 11:30


#2013 baseline
baselineTimeList = list()
baselineTimeList.append('10-01-2014 16:30')
baselineTimeList.append('23-12-2013 19:35')
baselineTimeList.append('25-12-2013 14:50')
baselineTimeList.append('27-12-2013 13:15')
baselineTimeList.append('02-01-2014 20:15')
baselineTimeList.append('09-01-2014 20:35')
baselineVOC = dict()
for baselineTime in baselineTimeList:
    timeIndex = timeList.index(baselineTime)
    matchedMovie = matchedMovieList[timeIndex]
    vocFrame = adjustedVOCList[timeIndex]
    baselineVOC[matchedMovie]= vocFrame

#2015 baseline 
#Star Wars Baseline 
starWarsBaseline = '26-12-2015 17:30'
starWarsIndex = timeListStarWars.index(starWarsBaseline)
vocFrame = (normalisedVOC['Star Wars: The Force Awakens'])[starWarsIndex]
baselineVOC['Star Wars: The Force Awakens']= vocFrame

imOffThenBaseline = '26-12-2015 17:30'
imOffThenIndex = timeListImOffThen.index(imOffThenBaseline)
vocFrame = (normalisedVOC['I\'m Off Then'])[imOffThenIndex]
baselineVOC['I\'m Off Then']= vocFrame

helpIShrunkBaseline = '30-12-2015 11:30'
helpIShrunkIndex = timeListHelpIShrunk.index(helpIShrunkBaseline)
vocFrame = (normalisedVOC['Help, I\'ve Shrunk My Teacher'])[helpIShrunkIndex]
baselineVOC['Help, I\'ve Shrunk My Teacher']= vocFrame

In [None]:
#movie data warping
movieDTWVOC = dict()
movieDistanceVOC = dict()

movieDTWVOC, movieDistanceVOC =  MovieDTW(normalisedVOC ,baselineVOC, movieList)

In [None]:
#if movie length is not the same as the baseline then DTW has failed in places must remove those places 

In [172]:
#save required data

<h3>Windowing</h3>

<b>Pseudo-code</b>

PREPROCESS

- take use the start and end time to get the global time index
- then add 2.5 mins to the start and end of the movie

WINDOWING

- use the actual start of the movie as the centre point of the window
- add 2.5 mins (5 intervals) previous to the centre point and 2.5 mins (5 intervals) after the centre point
- save that as coloumns in a dataframe 

Final dataframe should contain the window in line with the various film features

In [84]:
def windowing(screening, vocTimeList, co2Df):
    
    #add interval to start and end of window
    startTime = list(screening['Time'])[0]
    startTimeIndex = vocTimeList.index(startTime) - 5
    endTimeIndex = startTimeIndex + len(screening) + 9
    windowDf = co2Df.loc[startTimeIndex:endTimeIndex][:]
    
    #windowing process
    actualStartIndex = vocTimeList.index(startTime)
    actualEndIndex = actualStartIndex + len(screening) - 1
    
    windowedList = list()
    for i in range(0, len(screening)):
        window = windowDf.loc[actualStartIndex:actualStartIndex+9][:]
        windowedList.append(window)
        actualStartIndex = actualStartIndex + 1
    return windowedList

In [85]:
windowedNormalisedMovieScreening = dict()
for movie in movieList:
    normalisedMovieScreening = normalisedVOC[movie]
    windowedScreening = list()
    for screening in normalisedMovieScreening:
        #find year of movie and then to figure out what VOC dataset to give it
        year = list(screening['Time'])[0][6:10]
        if year == '2013' or year == '2014':
            vocTimeList =list(co2Df['Time'])
            windowDf = windowing(screening, vocTimeList, co2Df)
        elif year == '2015' or year == '2016':
            vocTimeList = list(cinestar2015Co2Df['Time'])
            windowDf = windowing(screening, vocTimeList, cinestar2015Co2Df)
        windowedScreening.append(windowDf)
    windowedNormalisedMovieScreening[movie] = windowedScreening 

## Train Test Split

- Randomly remove a screening from each movie to create the training and test dataset
- Randomly remove rows from the entire (concatenated) dataset to create a train/test set

In [19]:
#train/test split - by screening 
#randomly select and remove one screening

hungergamesTestIndex = random.randint(0, len(hungergamesDTWVOCList)-1)
hungergamesTest = hungergamesDTWVOCList[hungergamesTestIndex]
hungergamesTrain = hungergamesDTWVOCList[:]
hungergamesTrain.pop(hungergamesTestIndex)

array([[0.83427036],
       [0.83864006],
       [0.83623673],
       [0.83621386],
       [0.83942254],
       [0.84142828],
       [0.84373126],
       [0.84689421],
       [0.84895203],
       [0.85125883],
       [0.85308673],
       [0.85505437],
       [0.85828337],
       [0.85957777],
       [0.85982547],
       [0.86096235],
       [0.86259972],
       [0.86474773],
       [0.86776587],
       [0.86850516],
       [0.86946547],
       [0.87019714],
       [0.87363701],
       [0.878793  ],
       [0.87960724],
       [0.88027539],
       [0.88437579],
       [0.88656445],
       [0.88968802],
       [0.89384686],
       [0.89407804],
       [0.89424699],
       [0.89535212],
       [0.89845536],
       [0.90066816],
       [0.90028581],
       [0.90137061],
       [0.90398227],
       [0.90634749],
       [0.90958666],
       [0.91176007],
       [0.91290839],
       [0.91148062],
       [0.91134343],
       [0.91100808],
       [0.91310655],
       [0.91373533],
       [0.914