In [4]:
from pathlib import Path
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

In [5]:
DATA_ROOT = Path('..') / 'data'

dfs = []
activity_labels = ['bed', 'chair', 'lying', 'ambulating']
default_names = ['time', 'front', 'vertical', 'lateral', 'sensor_id', 'rssi', 'phase', 'frequency', 'activity']
for data_file in Path(DATA_ROOT).rglob('d[12]p??[FM]'):
    df = pd.read_csv(data_file, names=default_names)
    df['activity_label'] = df['activity'].apply(lambda i: activity_labels[i - 1])
    df['gender_label'] = str(data_file)[-1]
    df['participant'] = data_file.name
    
    # Add a column indicating order of the activities for a particiapnt.
    df = df.sort_values(by=['time'])
    df['activity_sequence'] = (df['activity'].shift(1) != df['activity']).cumsum()
    dfs.append(df)

sensor_df = pd.concat(dfs, axis='index')
sensor_df = sensor_df.sort_values(by=['participant', 'time'])

sensor_df.head()

Unnamed: 0,time,front,vertical,lateral,sensor_id,rssi,phase,frequency,activity,activity_label,gender_label,participant,activity_sequence
0,0.0,0.27203,1.0082,-0.082102,1,-63.5,2.4252,924.25,1,bed,M,d1p01M,1
1,0.5,0.27203,1.0082,-0.082102,1,-63.0,4.7369,921.75,1,bed,M,d1p01M,1
2,1.5,0.44791,0.91636,-0.013684,1,-63.5,3.0311,923.75,1,bed,M,d1p01M,1
3,1.75,0.44791,0.91636,-0.013684,1,-63.0,2.0371,921.25,1,bed,M,d1p01M,1
4,2.5,0.34238,0.96229,-0.059296,1,-63.5,5.892,920.25,1,bed,M,d1p01M,1


### We need to get the length of consecutive sequences for each participant

#### Let's start with a single participant first!

In [6]:
mini = sensor_df[sensor_df['participant'] == 'd1p01M']

In [7]:
mini

Unnamed: 0,time,front,vertical,lateral,sensor_id,rssi,phase,frequency,activity,activity_label,gender_label,participant,activity_sequence
0,0.00,0.27203,1.00820,-0.082102,1,-63.5,2.42520,924.25,1,bed,M,d1p01M,1
1,0.50,0.27203,1.00820,-0.082102,1,-63.0,4.73690,921.75,1,bed,M,d1p01M,1
2,1.50,0.44791,0.91636,-0.013684,1,-63.5,3.03110,923.75,1,bed,M,d1p01M,1
3,1.75,0.44791,0.91636,-0.013684,1,-63.0,2.03710,921.25,1,bed,M,d1p01M,1
4,2.50,0.34238,0.96229,-0.059296,1,-63.5,5.89200,920.25,1,bed,M,d1p01M,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,251.00,0.87003,0.46851,0.009122,4,-58.5,0.73631,921.75,1,bed,M,d1p01M,9
397,251.25,0.87003,0.46851,0.009122,1,-57.5,0.70870,923.25,1,bed,M,d1p01M,9
398,251.50,0.87003,0.46851,0.009122,1,-59.0,0.25157,924.75,1,bed,M,d1p01M,9
399,253.25,-0.00938,0.95081,-0.116310,1,-58.5,0.35282,924.25,1,bed,M,d1p01M,9


#### function for descriptives from a single participant

In [18]:
def count_datapoints(part):

    p_data = []

    for _, v in mini.groupby((part['activity'].shift() != part['activity']).cumsum()):
        p_data.append([v.activity.unique()[0], len(v), round(v.time.iloc[-1] - v.time.iloc[0], 3)])
        
    p_df = pd.DataFrame(p_data, columns = ['activity_type', 'number_of_measurements', 'duration'])
    
    return p_df.groupby('activity_type')[['number_of_measurements','duration']].describe()

In [19]:
p = count_datapoints(mini)

In [20]:
p

Unnamed: 0_level_0,number_of_measurements,number_of_measurements,number_of_measurements,number_of_measurements,number_of_measurements,number_of_measurements,number_of_measurements,number_of_measurements,duration,duration,duration,duration,duration,duration,duration,duration
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
activity_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
1,4.0,28.5,26.236107,4.0,7.0,27.5,49.0,55.0,4.0,17.27,17.656347,0.83,3.2075,15.625,29.6875,37.0
2,1.0,60.0,,60.0,60.0,60.0,60.0,60.0,1.0,77.0,,77.0,77.0,77.0,77.0,77.0
3,2.0,110.0,9.899495,103.0,106.5,110.0,113.5,117.0,2.0,41.875,7.601398,36.5,39.1875,41.875,44.5625,47.25
4,2.0,3.5,0.707107,3.0,3.25,3.5,3.75,4.0,2.0,5.625,2.65165,3.75,4.6875,5.625,6.5625,7.5
