In [2]:
# This is a script for finding the median Velocity Magnitude in both regions of the prep beat!
# We define the first region as the point starting from the first instance when velocity_Y is positive, 
# until the max value in Position_Y
# The second region is defined as everything after region one that is within half of the total Y distance from the highest point in region 1
# The output will be a table with only five features: Trial, BPM, GestureSize, MedianVelocityRegionOne and MedianVelocityRegionTwo
# Much like in the tutorial document, the first step is to import all of the libraries we're going to need. 

import pandas as pd
import numpy as np

In [3]:
# First we grab the data csv file from wherever it's saved. If your script is in the same directory as the csv, the following line will do
data = pd.read_csv(r'CleanDataUpdated.csv');

In [4]:
# This gives us an array of 1 through 40 
trials = range(1,41)
bpm = [80,100,120]
gestureSize = ["S","M","L"]
# We initialize these array because we're going to be iterating over them in a bit 

In [5]:
# These lines let Python know that we want to treat the data as numbers, not strings
# This will be useful when we're splicing the data set and comparing values
data.DistanceCoveredSoFar = data.DistanceCoveredSoFar.astype(float)
data.VelocityMagnitude = data.VelocityMagnitude.astype(float)
data.AngleToBP1 = data.AngleToBP1.astype(float)
data.BPM = data.BPM.astype(int)
data.Position_Y = data.Position_Y.astype(float)

In [6]:
# Here we define how we want our final table to look like. 
# We name all our features
newOutputTable = pd.DataFrame({'Trial':[],'BPM':[],'GestureSize':[],'TimeDiffRegionOneToPlane':[],'TimeDiffRegionTwoToPlane':[],'TimeEndRegionOne':[],'TimeEndRegionTwo':[],'TimeStartRegionOne':[],'TimeStartRegionTwo':[],'TimeDifferenceBetweenCollisions':[]})

In [43]:
for g in gestureSize:
    for b in bpm:
        for i in trials:
            # First we slice, by only getting the ith trial
            sliceByTrial = data.loc[data.Trial == i]
            #print(sliceByTrial.Trial.size)
            #Then we slice by getting only gesture g, of trial i
            sliceByGesture = sliceByTrial.loc[sliceByTrial.GestureSize == g]
            #print(sliceByGesture.Trial.size)
            #Then we slice by getting only bpm b, of gesture g and trial i
            sliceByBPM = sliceByGesture.loc[sliceByGesture.BPM == b]
            #print(sliceByBPM.Trial.size)
            
            if(sliceByBPM.Trial.size == 0):
                continue
            # =======================================
            # ============ REGION 1 =================
            # =======================================
            
            # First we need to find the index where the position is highest
            # This will correspond to the moment of the prep beat, right before the user moves downwards towards the base plane
            # idxmax() is a built in python function that grabs the index of the max value of a given array
            globalHighestPointIndex = sliceByBPM.Position_Y.idxmax()
            
            # The next operation exists because 'globalHighestPointIndex' will give us the index of the highest position, in terms of the indexing of the entire dataset
            # So although our now spliced data stored in 'sliceByBPM' has only about 40 data points, we might be getting an index of 6000 or something crazy.
            # The reason this is cumbersome is because, a few lines below, we're going to be slicing this dataset from 0 to our 'globalHighestPointIndex'
            # Getting the data points from 0 to 6000 doesn't make sense for a 40 data point dataset. So we want a 'local' 'HighestPointIndex'
            
            # We subtract 'globalHighestPointIndex' by the index of the very first data point in 'sliceByBPM'.
            # This gives us the number of items between highestPointIndex and the beginning of the gesture. 
            # We then add 1 python is not inclusive for the second index in a slice [x:y]
            # So to actually include the data point at y we need [x : y+1]
            localHighestPointIndex = globalHighestPointIndex - sliceByBPM.index[0] + 1
            
            # Now we need to get a similar index for the first position where Velocity_Y is positive
            # To start we slice the data to get all the points where Velocity_Y is positive
            sliceToGetPositiveYVelocity = sliceByBPM.loc[sliceByBPM.Velocity_Y > 0]
            
            # Then we get the index of the very first data point sliced
            globalIndexFirstPositiveYVelocity = sliceToGetPositiveYVelocity.index[0]
            # We convert to local and now we have the index we need.
            # We don't need to add 1 in this case because python is inclusive on the first index in a slice [x:y]
            localIndexFirstPositiveYVelocity = globalIndexFirstPositiveYVelocity - sliceByBPM.index[0]
            
            # Now we have our region One data
            RegionOne = sliceByBPM[localIndexFirstPositiveYVelocity:localHighestPointIndex+1]
            
           
            # =======================================
            # ============ REGION 2 =================
            # =======================================
            
            # Now we get the smallest Position_Y of region 1
            # We can get it by either getting the first index or actually using the min function!
            smallestYPoint_RegionOneIndex = RegionOne.Position_Y.idxmin()
            
            # We get the highest Y position with our globalHighestPointIndex
            y_positionHighestPoint = sliceByBPM.Position_Y[globalHighestPointIndex]
            # And our lowest y position with our smallest y point in region 1
            y_positionLowestPoint  = sliceByBPM.Position_Y[smallestYPoint_RegionOneIndex]
            
            # We get the absolute y distance between the lowest and highest points
            y_distance = np.absolute(y_positionHighestPoint - y_positionLowestPoint)
            # Then we take half of that distance
            y_distanceWithinRegion2 = y_distance/2
            
            # Now let's get everything NOT in Region 1
            portionAfterRegionOne = sliceByBPM[localHighestPointIndex+1:]
            
            # From everything NOT in Region 1 we grab every point that is within y_distanceWithinRegion2 of the highest Y position in Region 1
            # And like that we have Region Two 
            RegionTwo = portionAfterRegionOne[np.absolute(portionAfterRegionOne.Position_Y - y_positionHighestPoint) < y_distanceWithinRegion2]
            
            # =======================================
            # ============== Time Difference =================
            # =======================================
            
            interpString = g + str(b) + str(i) + "Interp"
            
            lastPointInRegionOne = RegionOne.iloc[RegionOne.Trial.size-1]['TimeRelativeToPrep']
            lastPointInRegionTwo = RegionTwo.iloc[RegionTwo.Trial.size-1]['TimeRelativeToPrep']
            #print(RegionOne.Trial.size-1)
            
            firstPointInRegionOne = RegionOne.iloc[0]['TimeRelativeToPrep']
            firstPointInRegionTwo = RegionTwo.iloc[0]['TimeRelativeToPrep']
            
            pointOfSecondCollision = sliceByBPM.loc[sliceByBPM.ID == interpString]
            #print(firstPointInRegionOne)
           # print(lastPointInRegionTwo)
            
            TimeDifferenceRegionOne = pointOfSecondCollision.iloc[0]['TimeRelativeToPrep'] - lastPointInRegionOne
            TimeDifferenceRegionTwo = pointOfSecondCollision.iloc[0]['TimeRelativeToPrep'] - lastPointInRegionTwo
            
            BeforeRegionOne = sliceByBPM[0:localIndexFirstPositiveYVelocity]
            #print(RegionOne.iloc[0])
            regionAfterPlane = BeforeRegionOne.loc[BeforeRegionOne.Position_Y < RegionOne.iloc[0].Position_Y]
            # Time difference between collissions is now going to be between the time of second collisions, and the initial collission with the first plane
            # The plane doenst exist however until you exit. So we take every conductor sample that's under the plane, but before it's spawned
            # And we consider the first of those times (minus 0.01) as the point of collision with the plane
            TimeDifferenceBetweenCollisions = pointOfSecondCollision.iloc[0]['TimeRelativeToPrep'] - regionAfterPlane.iloc[0].TimeRelativeToPrep-0.01
            
            newOutputTable = newOutputTable.append({'Trial': i,'BPM':b,'GestureSize':g,'TimeDiffRegionOneToPlane':TimeDifferenceRegionOne,'TimeDiffRegionTwoToPlane':TimeDifferenceRegionTwo, 'TimeEndRegionOne':lastPointInRegionOne, 'TimeEndRegionTwo':lastPointInRegionTwo, 'TimeStartRegionOne':firstPointInRegionOne, 'TimeStartRegionTwo': firstPointInRegionTwo, 'TimeDifferenceBetweenCollisions': TimeDifferenceBetweenCollisions}, ignore_index = True)
            

In [44]:
newOutputTable

Unnamed: 0,Trial,BPM,GestureSize,TimeDiffRegionOneToPlane,TimeDiffRegionTwoToPlane,TimeEndRegionOne,TimeEndRegionTwo,TimeStartRegionOne,TimeStartRegionTwo,TimeDifferenceBetweenCollisions
0,1.0,80.0,S,0.30,0.16,0.46,0.60,0.12,0.48,0.64
1,2.0,80.0,S,0.32,0.16,0.46,0.62,0.12,0.48,0.66
2,3.0,80.0,S,0.36,0.16,0.52,0.72,0.16,0.54,0.72
3,4.0,80.0,S,0.28,0.12,0.56,0.72,0.24,0.58,0.60
4,5.0,80.0,S,0.33,0.17,0.58,0.74,0.24,0.60,0.67
5,6.0,80.0,S,0.31,0.17,0.54,0.68,0.16,0.56,0.69
6,7.0,80.0,S,0.37,0.15,0.48,0.70,0.18,0.54,0.67
7,8.0,80.0,S,0.35,0.13,0.44,0.66,0.16,0.50,0.63
8,9.0,80.0,S,0.43,0.19,0.54,0.78,0.26,0.56,0.71
9,10.0,80.0,S,0.35,0.17,0.54,0.72,0.20,0.56,0.69


In [11]:
np.mean(newOutputTable[newOutputTable.GestureSize == "S"])

Trial                              20.516949
BPM                                99.830508
TimeDiffRegionOneToPlane            0.254407
TimeDiffRegionTwoToPlane            0.112542
TimeEndRegionOne                    0.403898
TimeEndRegionTwo                    0.545763
TimeStartRegionOne                  0.130339
TimeStartRegionTwo                  0.433898
TimeDifferenceBetweenCollisions     0.527966
dtype: float64

In [12]:
np.mean(newOutputTable[newOutputTable.GestureSize == "M"])

Trial                               20.500000
BPM                                100.000000
TimeDiffRegionOneToPlane             0.263500
TimeDiffRegionTwoToPlane             0.107167
TimeEndRegionOne                     0.423833
TimeEndRegionTwo                     0.580167
TimeStartRegionOne                   0.149833
TimeStartRegionTwo                   0.451333
TimeDifferenceBetweenCollisions      0.537500
dtype: float64

In [10]:
np.mean(newOutputTable[newOutputTable.GestureSize == "L"])

Trial                        20.500000
BPM                         100.000000
TimeDiffRegionOneToPlane      0.275500
TimeDiffRegionTwoToPlane      0.101833
TimeEndRegionOne              0.451667
TimeEndRegionTwo              0.625333
TimeStartRegionOne            0.175833
TimeStartRegionTwo            0.478833
dtype: float64

In [11]:
# These last three lines are just for fun. We can see that the median velocity definitely increases with size! 
# Now let's go get more data! 

In [45]:
# When you're done with your data set, save it as a csv and push it up to develop. 
# Once we have all of the data we need, we'll combine it all into one table and start messing around with the values. 
newOutputTable.to_csv(r'TimeDifferenceregionOneAndTwoToBasePlane.csv')