In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
data = pd.read_csv(r'CleanData.csv');

In [None]:
trials = range(1,41)
bpm = [80,100,120]
gestureSize = ["S","M","L"]

In [None]:
data.DistanceCoveredSoFar = data.DistanceCoveredSoFar.astype(float)
data.VelocityMagnitude = data.VelocityMagnitude.astype(float)
data.AngleToBP1 = data.AngleToBP1.astype(float)
data.BPM = data.BPM.astype(int)
data.Position_Y = data.Position_Y.astype(float)

In [None]:
newOutputTable = pd.DataFrame({'Trial':[],'BPM':[],'GestureSize':[],'MedianVelocityRegionOne':[]})

In [None]:
newCleanData = pd.DataFrame({'ID':[],'GestureSize':[],'Velocity_X':[],'Velocity_Y':[],'Velocity_Z':[],'Position_X':[],'Position_Y':[],'Position_z':[],'VelocityMagnitude':[],'TimeRelativeToPrep':[],'AngleToBP1':[],'DistanceCoveredSoFar':[],'Acceleration':[],'BPM':[],'Trial':[]})

In [389]:
for g in gestureSize:
    for b in bpm:
        for i in trials:
            # First we slice, by only getting the ith trial
            sliceByTrial = data.loc[data.Trial == i]
            #Then we slice by getting only gesture g, of trial i
            sliceByGesture = sliceByTrial.loc[sliceByTrial.GestureSize == g]
            #Then we slice by getting only bpm b, of gesture g and trial i
            sliceByBPM = sliceByGesture.loc[sliceByGesture.BPM == b]
            
            # =======================================
            # ============ REGION 1 =================
            # =======================================
            
            # First we need to find the index where the position is highest
            # This will correspond to the moment of the prep beat, right before the user moves downwards towards the base plane
            # idxmax() is a built in python function that grabs the index of the max value of a given array
            globalHighestPointIndex = sliceByBPM.Position_Y.idxmax()
            
            # The next operation exists because 'globalHighestPointIndex' will give us the index of the highest position, in terms of the indexing of the entire dataset
            # So although our now spliced data stored in 'sliceByBPM' has only about 40 data points, we might be getting an index of 6000 or something crazy.
            # The reason this is cumbersome is because, a few lines below, we're going to be slicing this dataset from 0 to our 'globalHighestPointIndex'
            # Getting the data points from 0 to 6000 doesn't make sense for a 40 data point dataset. So we want a 'local' 'HighestPointIndex'
            
            # We subtract 'globalHighestPointIndex' by the index of the very first data point in 'sliceByBPM'.
            # This gives us the number of items between highestPointIndex and the beginning of the gesture. 
            # We then add 1 python is not inclusive for the second index in a slice [x:y]
            # So to actually include the data point at y we need [x : y+1]
            localHighestPointIndex = globalHighestPointIndex - sliceByBPM.index[0] + 1
            
            # Now we need to get a similar index for the first position where Velocity_Y is positive
            # To start we slice the data to get all the points where Velocity_Y is positive
            sliceToGetPositiveYVelocity = sliceByBPM.loc[sliceByBPM.Velocity_Y > 0]
            
            # Then we get the index of the very first data point sliced
            globalIndexFirstPositiveYVelocity = sliceToGetPositiveYVelocity.index[0]
            # We convert to local and now we have the index we need.
            # We don't need to add 1 in this case because python is inclusive on the first index in a slice [x:y]
            localIndexFirstPositiveYVelocity = globalIndexFirstPositiveYVelocity - sliceByBPM.index[0]
            
            # Now we have our region One data
            RegionOne = sliceByBPM[localIndexFirstPositiveYVelocity:localHighestPointIndex+1]
            
            
            # =======================================
            # ============ REGION 2 =================
            # =======================================
            
            # Now we get the smallest Position_Y of region 1
            # We can get it by either getting the first index or actually using the min function!
            smallestYPoint_RegionOneIndex = RegionOne.Position_Y.idxmin()
            
            # We get the highest Y position with our globalHighestPointIndex
            y_positionHighestPoint = sliceByBPM.Position_Y[globalHighestPointIndex]
            # And our lowest y position with our smallest y point in region 1
            y_positionLowestPoint  = sliceByBPM.Position_Y[smallestYPoint_RegionOneIndex]
            
            # We get the absolute y distance between the lowest and highest points
            y_distance = np.absolute(y_positionHighestPoint - y_positionLowestPoint)
            # Then we take half of that distance
            y_distanceWithinRegion2 = y_distance/2
            
            # Now let's get everything NOT in Region 1
            portionAfterRegionOne = sliceByBPM[localHighestPointIndex+1:]
            
            # From everything NOT in Region 1 we grab every point that is within y_distanceWithinRegion2 of the highest Y position in Region 1
            # And like that we have Region Two 
            RegionTwo = portionAfterRegionOne[np.absolute(portionAfterRegionOne.Position_Y - y_positionHighestPoint) < y_distanceWithinRegion2]
            
            # ============================================
            # ============ Interpolating =================
            # ============================================
            
            # What to do next
            # Need to get the index o the first point that crosses the plane
            # Save the index of that point
            # Grab a chunk of the entire data set up until that point, and save
            # the chunk that's after that point
            # Add the point
            # reform the data
            # Interpolate
            # no need to check the bounds as I know for sure that the final point
            # doesnt necessarily correspond to lowest Position _Y
            # Maybe we don't need to add an entirely new point. Maybe I could just get the time difference and call it a day 
                        
            # First we grab the index of the lowest point in regionOne (base plane)
            indexOfLowestYPosition = RegionOne.Position_Y.idxmin()
            # And the index of the last position
            indexOfLastPosition = sliceByBPM.DistanceCoveredSoFar.idxmax()
        
            # Now we have the yposition of the base plane, the last point in the set, and the penultimate point
            Position_Y_AtBasePlane     = RegionOne.Position_Y.loc[indexOfLowestYPosition]
            Position_Y_BeforeBasePlane = sliceByBPM.Position_Y.loc[indexOfLastPosition-1]
            Position_Y_AfterBasePlane  = sliceByBPM.Position_Y.loc[indexOfLastPosition]              
            
            # Here we grab all points after the base plane located below the base plane y position
            pointsBelowPlane = portionAfterRegionOne.loc[portionAfterRegionOne.Position_Y < Position_Y_AtBasePlane].iloc[0]
            
            pointPastPlane   = pointsBelowPlane.iloc[0]
            # Here we grab all the point above the base plane and after region one 
            pointsBeforePlane = portionAfterRegionOne.loc[portionAfterRegionOne.Position_Y > Position_Y_AtBasePlane]
            
            # Now we grab all of the data, except for the very last point
            firstChunkOfData = sliceByBPM[0:indexRightBeforePlane - sliceByBPM.index[0] + 1]
            
            # We add a new point with Y position of the base plane
            IDOfNewPoint = pointPastPlane.loc['ID'] + "Interp"
            newPointToBeInterpolated = pd.DataFrame({'Position_Y' : [Position_Y_AtBasePlane],
                                                     'GestureSize': [pointPastPlane.loc['GestureSize'][0]],
                                                     'ID': IDOfNewPoint})
            # We add that point to our first Chunk
            firstChunkOfData = firstChunkOfData.append(newPointToBeInterpolated,ignore_index = True)
            # And add the last point of the originla data set
            finalChunkOfData = firstChunkOfData.append(pointPastPlane,ignore_index = True)
            
            # Now we interpolate the missing values of the 'newPointToBeInterpolated'
            finalChunkOfData = finalChunkOfData.interpolate(method ='linear', limit_direction ='forward') 
            
            
            #if (pointPastPlane.loc['ID'] != sliceByBPM.iloc[sliceByBPM.Trial.size-1]['ID']):
            #    print("This condition checks that the point past the plane is also the last point in the sliceByBPM data set")            
            #if ((pointPastPlane.loc['Unnamed: 0'].astype(int) - pointBeforePlane.loc['Unnamed: 0'].astype(int))>1):
            #    print(pointPastPlane.loc['Unnamed: 0'].astype(int))
            #    print(pointBeforePlane.loc['Unnamed: 0'].astype(int))
            #    print("This checks to make sure that point before plane is right before the last point in the set")
            # We update our clean data with this new data point
            newCleanData = newCleanData.append(finalChunkOfData,ignore_index = True)
                
            
            

AttributeError: 'numpy.int64' object has no attribute 'loc'

In [None]:
newCleanData = newCleanData.loc[:, ~newCleanData.columns.str.contains('^Unnamed')]
newCleanData

In [None]:
newOutputTable.to_csv(r'MedianVelocityRegionOne.csv')