In [52]:
import pandas as pd
import numpy as np

In [53]:
data = pd.read_csv(r'CleanData.csv');

In [54]:

# This gives us an array of 1 through 40 
trials = range(1,41)
bpm = [80,100,120]
gestureSize = ["S","M","L"]
# We initialize these array because we're going to be iterating over them in a bit

In [55]:
# These lines let Python know that we want to treat the data as numbers, not strings
# This will be useful when we're splicing the data set and comparing values
data.DistanceCoveredSoFar = data.DistanceCoveredSoFar.astype(float)
data.VelocityMagnitude = data.VelocityMagnitude.astype(float)
data.AngleToBP1 = data.AngleToBP1.astype(float)
data.BPM = data.BPM.astype(int)
data.Position_Y = data.Position_Y.astype(float)

In [56]:
# Here we define how we want our final table to look like. 
# We name all our features
newOutputTable = pd.DataFrame({'Trial':[],'BPM':[],'GestureSize':[],'TotalDistanceCoveredInRegionOne':[], 'TotalDistanceCoveredInRegionTwo': [], 'TotalDistanceCoveredBothRegions': []})

In [57]:
 for g in gestureSize:
    for b in bpm:
        for i in trials:
            #g = 'S'
            #b = 80
            #i = 1
            sliceByTrial = data.loc[data.Trial == i]
            sliceByGesture = sliceByTrial.loc[sliceByTrial.GestureSize == g]
            sliceByBPM = sliceByGesture.loc[sliceByGesture.BPM == b]


            # =======================================
            # ============ REGION 1 =================
            # =======================================

            globalHighestPointIndex = sliceByBPM.Position_Y.idxmax()
            # currentTotalDistanceCovered = sliceByBPM[globalHighestPointIndex].DistanceCoveredSoFar
            localHighestPointIndex = globalHighestPointIndex - sliceByBPM.index[0]

            sliceToGetPositiveYVelocity = sliceByBPM.loc[sliceByBPM.Velocity_Y > 0]
            globalIndexFirstPositiveYVelocity = sliceToGetPositiveYVelocity.index[0]
            localIndexFirstPositiveYVelocity = globalIndexFirstPositiveYVelocity - sliceByBPM.index[0]

            RegionOne = sliceByBPM[localIndexFirstPositiveYVelocity:localHighestPointIndex+1]


            # =======================================
            # ============ REGION 2 =================
            # =======================================

            # Now we get the smallest Position_Y of region 1
            # We can get it by either getting the first index or actually using the min function!
            smallestYPoint_RegionOneIndex = RegionOne.Position_Y.idxmin()

            # We get the highest Y position with our globalHighestPointIndex
            y_positionHighestPoint = sliceByBPM.Position_Y[globalHighestPointIndex]
            # And our lowest y position with our smallest y point in region 1
            y_positionLowestPoint  = sliceByBPM.Position_Y[smallestYPoint_RegionOneIndex]

            # We get the absolute y distance between the lowest and highest points
            y_distance = np.absolute(y_positionHighestPoint - y_positionLowestPoint)
            # Then we take half of that distance
            y_distanceWithinRegion2 = y_distance/2

            # Now let's get everything NOT in Region 1
            portionAfterRegionOne = sliceByBPM[localHighestPointIndex+1:]

            # From everything NOT in Region 1 we grab every point that is within y_distanceWithinRegion2 of the highest Y position in Region 1
            # And like that we have Region Two 
            RegionTwo = portionAfterRegionOne[np.absolute(portionAfterRegionOne.Position_Y - y_positionHighestPoint) < y_distanceWithinRegion2]

            # =======================================
            # ====== Total Distance Covered ==========
            # =======================================

            distanceCoveredValuesOne = RegionOne["DistanceCoveredSoFar"] #need to get region one distance covered values only
            regionOneTotalDistanceCovered = max(distanceCoveredValuesOne) #gets the maximum value in the array containing DistanceCovered values in RegionOne
            distanceCoveredValuesBoth = RegionTwo["DistanceCoveredSoFar"]
            bothRegionsTotalDistanceCovered = max(distanceCoveredValuesBoth)
            regionTwoTotalDistanceCovered = bothRegionsTotalDistanceCovered - regionOneTotalDistanceCovered
            newOutputTable = newOutputTable.append({'Trial': i,'BPM':b,'GestureSize':g,'TotalDistanceCoveredInRegionOne':regionOneTotalDistanceCovered, 'TotalDistanceCoveredInRegionTwo': regionTwoTotalDistanceCovered, 'TotalDistanceCoveredBothRegions': bothRegionsTotalDistanceCovered}, ignore_index = True)


In [58]:
newOutputTable

Unnamed: 0,BPM,GestureSize,TotalDistanceCoveredInRegionOne,TotalDistanceCoveredInRegionTwo,Trial
0,80.0,S,0.3302,0.46734,1.0


In [59]:
np.mean(newOutputTable[newOutputTable.GestureSize == "S"])

BPM                                80.00000
TotalDistanceCoveredInRegionOne     0.33020
TotalDistanceCoveredInRegionTwo     0.46734
Trial                               1.00000
dtype: float64

In [60]:
np.mean(newOutputTable[newOutputTable.GestureSize == "M"])

BPM                               NaN
GestureSize                       NaN
TotalDistanceCoveredInRegionOne   NaN
TotalDistanceCoveredInRegionTwo   NaN
Trial                             NaN
dtype: float64

In [61]:
np.mean(newOutputTable[newOutputTable.GestureSize == "L"])

BPM                               NaN
GestureSize                       NaN
TotalDistanceCoveredInRegionOne   NaN
TotalDistanceCoveredInRegionTwo   NaN
Trial                             NaN
dtype: float64

In [62]:
# When you're done with your data set, save it as a csv and push it up to develop. 
# Once we have all of the data we need, we'll combine it all into one table and start messing around with the values. 
newOutputTable.to_csv(r'TotalDistanceCoveredBothRegions.csv')