In [1]:
# This is a script for finding the median Velocity Magnitude in the first region of the prep beat!
# We define the first region as everything leading up to the point of highest altitude, that is the max value in Position_Y
# The output will be a table with only four features: Trial, BPM, GestureSize, and MedianVelocityRegionOne
# Much like in the tutorial document, the first step is to import all of the libraries we're going to need. 


import pandas as pd
import numpy as np

In [2]:
# First we grab the data csv file from wherever it's saved. If your script is in the same directory as the csv, the following line will do
data = pd.read_csv(r'CleanData.csv');

In [3]:
# This gives us an array of 1 through 40 
trials = range(1,41)
bpm = [80,100,120]
gestureSize = ["S","M","L"]
# We initialize these array because we're going to be iterating over them in a bit 

In [4]:
# These lines let Python know that we want to treat the data as numbers, not strings
# This will be useful when we're splicing the data set and comparing values
data.DistanceCoveredSoFar = data.DistanceCoveredSoFar.astype(float)
data.VelocityMagnitude = data.VelocityMagnitude.astype(float)
data.AngleToBP1 = data.AngleToBP1.astype(float)
data.BPM = data.BPM.astype(int)
data.Position_Y = data.Position_Y.astype(float)

In [5]:
# Here we define how we want our final table to look like. 
# We name all our features
newOutputTable = pd.DataFrame({'Trial':[],'BPM':[],'GestureSize':[],'MedianVelocityRegionOne':[]})

In [6]:
# Here's where we iterate over the arrays we mentioned earlier
for g in gestureSize:
    for b in bpm:
        for i in trials:
            # First we slice, by only getting the ith trial
            sliceByTrial = data.loc[data.Trial == i]
            #Then we slice by getting only gesture g, of trial i
            sliceByGesture = sliceByTrial.loc[sliceByTrial.GestureSize == g]
            #Then we slice by getting only bpm b, of gesture g and trial i
            sliceByBPM = sliceByGesture.loc[sliceByGesture.BPM == b]
            
            # Now we need to find the index where the position is highest
            # T his will correspond to the moment of the prep beat, right before the user moves downwards towards the base plane
            # idxmax() is a built in python function that grabs the index of the max value of a given array
            globalHighestPointIndex = sliceByBPM.Position_Y.idxmax()
            
            # The next operation exists because 'globalHighestPointIndex' will give us the index of the highest position, in terms of the indexing of the entire dataset
            # So although our now spliced data stored in 'sliceByBPM' has only about 40 data points, we might be getting an index of 6000 or something crazy.
            # The reason this is cumbersome is because, a few lines below, we're going to be slicing this dataset from 0 to our 'globalHighestPointIndex'
            # Getting the data points from 0 to 6000 doesn't make sense for a 40 data point dataset. So we want a 'local' 'HighestPointIndex'
            
            # We subtract 'globalHighestPointIndex' by the index of the very first data point in 'sliceByBPM'.
            # This gives us the number of items between highestPointIndex and the beginning of the gesture. 
            # We then add 1 because of how arrays work. That is, array[5] for example gives us the item at index 4. 
            # localHighestPointIndex is now the index at which the highest Position_Y is stored within the 'sliceByBPM' dataset
            localHighestPointIndex = globalHighestPointIndex - sliceByBPM.index[0] + 1
            
            # Finally we can get the median by using the np.median function on the now sliced 'sliceByBPM' set
            currentMedianVelocity = np.median(sliceByBPM[0:localHighestPointIndex].VelocityMagnitude)
            
            # Finally we use the 'pandas' append function to add some data to the table we started creating earlier. 
            newOutputTable = newOutputTable.append({'Trial': i,'BPM':b,'GestureSize':g,'MedianVelocityRegionOne':currentMedianVelocity}, ignore_index = True)
            

In [7]:
# Now if we run this cell, we cann see that we've created a new table, now with more condensed information
newOutputTable

Unnamed: 0,Trial,BPM,GestureSize,MedianVelocityRegionOne
0,1.0,80.0,S,0.752596
1,2.0,80.0,S,0.614580
2,3.0,80.0,S,0.795070
3,4.0,80.0,S,0.774642
4,5.0,80.0,S,0.740063
5,6.0,80.0,S,0.886231
6,7.0,80.0,S,0.924665
7,8.0,80.0,S,0.961053
8,9.0,80.0,S,0.863346
9,10.0,80.0,S,1.004309


In [8]:
np.mean(newOutputTable[newOutputTable.GestureSize == "S"])

Trial                       20.50000
BPM                        100.00000
MedianVelocityRegionOne      0.87882
dtype: float64

In [9]:
np.mean(newOutputTable[newOutputTable.GestureSize == "M"])

Trial                       20.500000
BPM                        100.000000
MedianVelocityRegionOne      1.345007
dtype: float64

In [10]:
np.mean(newOutputTable[newOutputTable.GestureSize == "L"])

Trial                       20.500000
BPM                        100.000000
MedianVelocityRegionOne      1.846911
dtype: float64

In [None]:
# These last three lines are just for fun. We can see that the median velocity definitely increases with size! 
# Now let's go get more data! 

In [None]:
# When you're done with your data set, save it as a csv and push it up to develop. 
# Once we have all of the data we need, we'll combine it all into one table and start messing around with the values. 
newOutputTable.to_csv(r'MedianVelocityRegionOne.csv')