In [65]:
# This is a script for finding the median Velocity Magnitude in both regions of the prep beat!
# We define the first region as the point starting from the first instance when velocity_Y is positive, 
# until the max value in Position_Y
# The second region is defined as everything after region one that is within half of the total Y distance from the highest point in region 1
# The output will be a table with only five features: Trial, BPM, GestureSize, MedianVelocityRegionOne and MedianVelocityRegionTwo
# Much like in the tutorial document, the first step is to import all of the libraries we're going to need. 

import pandas as pd
import numpy as np

In [66]:
# First we grab the data csv file from wherever it's saved. If your script is in the same directory as the csv, the following line will do
data = pd.read_csv(r'CleanData.csv');

In [67]:
# This gives us an array of 1 through 40 
trials = range(1,41)
bpm = [80,100,120]
gestureSize = ["S","M","L"]
# We initialize these array because we're going to be iterating over them in a bit 

In [68]:
# These lines let Python know that we want to treat the data as numbers, not strings
# This will be useful when we're splicing the data set and comparing values
data.DistanceCoveredSoFar = data.DistanceCoveredSoFar.astype(float)
data.VelocityMagnitude = data.VelocityMagnitude.astype(float)
data.AngleToBP1 = data.AngleToBP1.astype(float)
data.BPM = data.BPM.astype(int)
data.Position_Y = data.Position_Y.astype(float)

In [69]:
# Here we define how we want our final table to look like. 
# We name all our features
newOutputTable = pd.DataFrame({'Trial':[],'BPM':[],'GestureSize':[],'MedianVelocityRegionOne':[],'MedianVelocityRegionTwo':[]})

In [102]:
# Here's where we iterate over the arrays we mentioned earlier
for g in gestureSize:
    for b in bpm:
        for i in trials:
            # First we slice, by only getting the ith trial
            sliceByTrial = data.loc[data.Trial == i]
            #Then we slice by getting only gesture g, of trial i
            sliceByGesture = sliceByTrial.loc[sliceByTrial.GestureSize == g]
            #Then we slice by getting only bpm b, of gesture g and trial i
            sliceByBPM = sliceByGesture.loc[sliceByGesture.BPM == b]
            
            # =======================================
            # ============ REGION 1 =================
            # =======================================
            
            # First we need to find the index where the position is highest
            # This will correspond to the moment of the prep beat, right before the user moves downwards towards the base plane
            # idxmax() is a built in python function that grabs the index of the max value of a given array
            globalHighestPointIndex = sliceByBPM.Position_Y.idxmax()
            
            # The next operation exists because 'globalHighestPointIndex' will give us the index of the highest position, in terms of the indexing of the entire dataset
            # So although our now spliced data stored in 'sliceByBPM' has only about 40 data points, we might be getting an index of 6000 or something crazy.
            # The reason this is cumbersome is because, a few lines below, we're going to be slicing this dataset from 0 to our 'globalHighestPointIndex'
            # Getting the data points from 0 to 6000 doesn't make sense for a 40 data point dataset. So we want a 'local' 'HighestPointIndex'
            
            # We subtract 'globalHighestPointIndex' by the index of the very first data point in 'sliceByBPM'.
            # This gives us the number of items between highestPointIndex and the beginning of the gesture. 
            # We then add 1 python is not inclusive for the second index in a slice [x:y]
            # So to actually include the data point at y we need [x : y+1]
            localHighestPointIndex = globalHighestPointIndex - sliceByBPM.index[0] + 1
            
            # Now we need to get a similar index for the first position where Velocity_Y is positive
            # To start we slice the data to get all the points where Velocity_Y is positive
            sliceToGetPositiveYVelocity = sliceByBPM.loc[sliceByBPM.Velocity_Y > 0]
            
            # Then we get the index of the very first data point sliced
            globalIndexFirstPositiveYVelocity = sliceToGetPositiveYVelocity.index[0]
            # We convert to local and now we have the index we need.
            # We don't need to add 1 in this case because python is inclusive on the first index in a slice [x:y]
            localIndexFirstPositiveYVelocity = globalIndexFirstPositiveYVelocity - sliceByBPM.index[0]
            
            # Now we have our region One data
            RegionOne = sliceByBPM[localIndexFirstPositiveYVelocity:localHighestPointIndex+1]
            
            
            # =======================================
            # ============ REGION 2 =================
            # =======================================
            
            # Now we get the smallest Position_Y of region 1
            # We can get it by either getting the first index or actually using the min function!
            smallestYPoint_RegionOneIndex = RegionOne.Position_Y.idxmin()
            
            # We get the highest Y position with our globalHighestPointIndex
            y_positionHighestPoint = sliceByBPM.Position_Y[globalHighestPointIndex]
            # And our lowest y position with our smallest y point in region 1
            y_positionLowestPoint  = sliceByBPM.Position_Y[smallestYPoint_RegionOneIndex]
            
            # We get the absolute y distance between the lowest and highest points
            y_distance = np.absolute(y_positionHighestPoint - y_positionLowestPoint)
            # Then we take half of that distance
            y_distanceWithinRegion2 = y_distance/2
            
            # Now let's get everything NOT in Region 1
            portionAfterRegionOne = sliceByBPM[localHighestPointIndex+1:]
            
            # From everything NOT in Region 1 we grab every point that is within y_distanceWithinRegion2 of the highest Y position in Region 1
            # And like that we have Region Two 
            RegionTwo = portionAfterRegionOne[np.absolute(portionAfterRegionOne.Position_Y - y_positionHighestPoint) < y_distanceWithinRegion2]
            
            # =======================================
            # ============== Median =================
            # =======================================
            
            currentMedianVelocityRegionTwo = np.median(RegionTwo.VelocityMagnitude)
            currentMedianVelocityRegionOne = np.median(RegionOne.VelocityMagnitude)
            
            newOutputTable = newOutputTable.append({'Trial': i,'BPM':b,'GestureSize':g,'MedianVelocityRegionOne':currentMedianVelocityRegionOne,'MedianVelocityRegionTwo':currentMedianVelocityRegionTwo}, ignore_index = True)
            

In [81]:
newOutputTable

Unnamed: 0,Trial,BPM,GestureSize,MedianVelocityRegionOne,MedianVelocityRegionTwo
0,1.0,80.0,S,1.080500,0.607110
1,2.0,80.0,S,0.959128,0.566222
2,3.0,80.0,S,0.978255,0.721222
3,4.0,80.0,S,1.053581,0.610820
4,5.0,80.0,S,1.100675,0.616106
5,6.0,80.0,S,0.988700,0.646852
6,7.0,80.0,S,1.039103,0.686721
7,8.0,80.0,S,1.079486,0.772858
8,9.0,80.0,S,1.277930,0.800554
9,10.0,80.0,S,1.179397,0.711218


In [72]:
np.mean(newOutputTable[newOutputTable.GestureSize == "S"])

Trial                       20.500000
BPM                        100.000000
MedianVelocityRegionOne      0.997037
MedianVelocityRegionTwo      0.591471
dtype: float64

In [73]:
np.mean(newOutputTable[newOutputTable.GestureSize == "M"])

Trial                       20.500000
BPM                        100.000000
MedianVelocityRegionOne      1.411732
MedianVelocityRegionTwo      0.819753
dtype: float64

In [74]:
np.mean(newOutputTable[newOutputTable.GestureSize == "L"])

Trial                       20.500000
BPM                        100.000000
MedianVelocityRegionOne      1.934169
MedianVelocityRegionTwo      0.990695
dtype: float64

In [None]:
# These last three lines are just for fun. We can see that the median velocity definitely increases with size! 
# Now let's go get more data! 

In [None]:
# When you're done with your data set, save it as a csv and push it up to develop. 
# Once we have all of the data we need, we'll combine it all into one table and start messing around with the values. 
newOutputTable.to_csv(r'MedianVelocityRegionTwo.csv')