In [51]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from scipy.interpolate import interp1d
import scipy.stats as st

In [2]:
from pathlib import Path

import pandas as pd

DATA_ROOT = Path('..') / 'data'

dfs = []
activity_labels = ['bed', 'chair', 'lying', 'ambulating']
default_names = ['time', 'front', 'vertical', 'lateral', 'sensor_id', 'rssi', 'phase', 'frequency', 'activity']
for data_file in Path(DATA_ROOT).rglob('d[12]p??[FM]'):
    df = pd.read_csv(data_file, names=default_names)
    df['activity_label'] = df['activity'].apply(lambda i: activity_labels[i - 1])
    df['gender_label'] = str(data_file)[-1]
    df['participant'] = data_file.name
    
    # Add a column indicating order of the activities for a particiapnt.
    df = df.sort_values(by=['time'])
    df['activity_sequence'] = (df['activity'].shift(1) != df['activity']).cumsum()
    dfs.append(df)

sensor_df = pd.concat(dfs, axis='index')
sensor_df = sensor_df.sort_values(by=['participant', 'time'])

sensor_df.head()

Unnamed: 0,time,front,vertical,lateral,sensor_id,rssi,phase,frequency,activity,activity_label,gender_label,participant,activity_sequence
0,0.0,0.27203,1.0082,-0.082102,1,-63.5,2.4252,924.25,1,bed,M,d1p01M,1
1,0.5,0.27203,1.0082,-0.082102,1,-63.0,4.7369,921.75,1,bed,M,d1p01M,1
2,1.5,0.44791,0.91636,-0.013684,1,-63.5,3.0311,923.75,1,bed,M,d1p01M,1
3,1.75,0.44791,0.91636,-0.013684,1,-63.0,2.0371,921.25,1,bed,M,d1p01M,1
4,2.5,0.34238,0.96229,-0.059296,1,-63.5,5.892,920.25,1,bed,M,d1p01M,1


In [3]:
def normalize_time_values(df):
    return pd.DataFrame(dict(t_norm=np.linspace(0, 100, len(df))))

sensor_df['t_norm'] = (sensor_df.groupby(['participant', 'activity_sequence'])
                       .apply(normalize_time_values)
                       .reset_index(drop=True)['t_norm'])
sensor_df.head()

Unnamed: 0,time,front,vertical,lateral,sensor_id,rssi,phase,frequency,activity,activity_label,gender_label,participant,activity_sequence,t_norm
0,0.0,0.27203,1.0082,-0.082102,1,-63.5,2.4252,924.25,1,bed,M,d1p01M,1,0.0
1,0.5,0.27203,1.0082,-0.082102,1,-63.0,4.7369,921.75,1,bed,M,d1p01M,1,1.851852
2,1.5,0.44791,0.91636,-0.013684,1,-63.5,3.0311,923.75,1,bed,M,d1p01M,1,3.703704
3,1.75,0.44791,0.91636,-0.013684,1,-63.0,2.0371,921.25,1,bed,M,d1p01M,1,5.555556
4,2.5,0.34238,0.96229,-0.059296,1,-63.5,5.892,920.25,1,bed,M,d1p01M,1,7.407407


In [4]:
def interpolate(v, t_norm):
    t = np.linspace(min(t_norm), max(t_norm), len(v))
    # We need at least 2 data points for interpolation
    if len(v) == 1:
        v = [v[0]] * 2
        t = [min(t_norm), max(t_norm)]
    try:
        std = np.std(v)
        mn = np.mean(v)
        z_score = (v - mn) / std if std else v - mn
        f = interp1d(t, v)
    except ValueError:
        raise
    v_norm = f(t_norm)
    return v_norm

def interpolate_sensor_data(df):
    data = {}
    n = 40
    for column in ['participant', 'activity', 'activity_label', 'activity_sequence']:
        data[column] = [df.iloc[0][column]] * n
    t_norm = np.linspace(0, 100, n)
    data['t_norm'] = t_norm
    for column in ['front', 'lateral', 'vertical']:
        data[column] = interpolate(df[column].values, t_norm)
    return pd.DataFrame(data)
    

In [143]:
norm_df = (sensor_df.groupby(['participant', 'activity_sequence'])
           .apply(interpolate_sensor_data)
           .reset_index(drop=True))

In [6]:
colors = px.colors.qualitative.D3
values = ['front', 'lateral', 'vertical']
SENSOR_COLORS = {
    value: colors[i] for i, value in enumerate(values)}

In [21]:
def jitter(size, mean=0., sd=1.):
    return np.random.normal(mean, sd, size=size)

In [144]:
activity_labels = sorted(norm_df['activity_label'].unique())

fig = make_subplots(
    rows=2, 
    cols=2, 
    vertical_spacing=0.1,
    horizontal_spacing=0.1,
    subplot_titles=[label.capitalize() for label in activity_labels])

first = True
mode = 'lines+markers'
for _, activity_df in norm_df.groupby(['participant', 'activity_sequence']):
    activity_label = activity_df['activity_label'].values[0]
    i = activity_labels.index(activity_label)
    
    row = i // 2 + 1
    col = i % 2 + 1
    for sensor in ['front', 'lateral', 'vertical']:
        fig.add_trace(
            go.Scattergl(
                x=activity_df['t_norm'] + jitter(len(activity_df)), 
                y=activity_df[sensor],
                marker_color=SENSOR_COLORS[sensor],
                opacity=0.0625,
                name=sensor,
                showlegend=False,
                legendgroup=sensor,
                mode=mode), row=row, col=col)
    first = False

# Just for legend :p
for sensor in ['front', 'lateral', 'vertical']:
    fig.add_trace(
        go.Scattergl(
            x=[None], 
            y=[None],
            marker_color=SENSOR_COLORS[sensor],
            name=sensor.capitalize(),
            showlegend=True,
            legendgroup=sensor,
            mode=mode))
    
fig.update_yaxes(range=[-2,2], showgrid=False, zeroline=True, showticklabels=False)
fig.update_xaxes(showgrid=False, showticklabels=False, zeroline=True)
fig.update_layout(
    width=800, 
    height=600, 
    margin=dict(l=16, r=16, t=64, b=16),
    legend_title='<b>Sensor</b>',
    title='<b>Interpolated sensor data for all participants</b>')
fig.show()


In [78]:
t_norms = sorted(norm_df['t_norm'].unique())
rows = []
for activity_label in norm_df['activity_label'].unique():
    activity_df = norm_df[norm_df['activity_label'] == activity_label]
    for t in t_norms:
        row = {'time': t, 'activity_label': activity_label}
        for sensor in ['front', 'lateral', 'vertical']:
            sample = activity_df[activity_df['t_norm'] == t][sensor]
            mid = np.mean(sample)
            high, low = st.t.interval(0.95, len(sample) - 1, loc=mid, scale=np.std(sample))
            row[f'{sensor}-mid'] = mid
            row[f'{sensor}-low'] = low
            row[f'{sensor}-high'] = high
        rows.append(row)
sample_df = pd.DataFrame(rows)
sample_df.head()

Unnamed: 0,time,activity_label,front-mid,front-low,front-high,lateral-mid,lateral-low,lateral-high,vertical-mid,vertical-low,vertical-high
0,0.0,bed,0.452491,1.149618,-0.244637,-0.10574,0.611552,-0.823032,0.772841,1.228377,0.317305
1,2.564103,bed,0.432235,1.105713,-0.241244,-0.10314,0.582532,-0.788812,0.791802,1.222903,0.360701
2,5.128205,bed,0.428609,1.067348,-0.21013,-0.103902,0.573032,-0.780835,0.802645,1.219449,0.385841
3,7.692308,bed,0.434119,1.065267,-0.19703,-0.10068,0.576543,-0.777903,0.803437,1.213258,0.393615
4,10.25641,bed,0.435582,1.062678,-0.191514,-0.102344,0.569353,-0.774041,0.806274,1.213178,0.399371


In [140]:
activity_labels = sorted(sample_df['activity_label'].unique())

subplot_titles = np.reshape(activity_labels, (2, 2))
    
fig = make_subplots(
    rows=2, 
    cols=2, 
    subplot_titles=[title.capitalize() for title in subplot_titles.flatten('C')],
    vertical_spacing=0.1,
    horizontal_spacing=0.1)

for row, col in [(1, 1), (1, 2), (2, 1), (2, 2)]:
    activity_label = subplot_titles[row - 1, col - 1]
    activity_df = sample_df[sample_df['activity_label'] == activity_label]
    for sensor in ['front', 'lateral', 'vertical']:
        fig.add_trace(
            go.Scatter(
                x=pd.concat((activity_df['time'], activity_df['time'][::-1])), 
                y=pd.concat((activity_df[f'{sensor}-low'], activity_df[f'{sensor}-high'][::-1])),
                fillcolor=SENSOR_COLORS[sensor],
                opacity=0.25,
                line_width=0,
                mode='lines',
                name=sensor,
                fill='toself',
                showlegend=False,
                legendgroup=sensor),
            row=row,
            col=col)
        
        fig.add_trace(
            go.Scatter(
                x=activity_df['time'], 
                y=activity_df[f'{sensor}-mid'],
                line_color=SENSOR_COLORS[sensor],
                mode='lines',
                name=sensor.capitalize(),
                showlegend=row == 1 and col == 1,
                legendgroup=sensor),
            row=row,
            col=col)
fig.update_yaxes(range=[-2,2], showgrid=False, zeroline=True, showticklabels=False)
fig.update_xaxes(range=[-5, 105], showgrid=False, showticklabels=False, zeroline=True)
fig.update_layout(
    width=800, 
    height=600, 
    margin=dict(l=16, r=16, t=64, b=16),
    title='<b>Interpolated sensor data with 95% CIs',
    legend_title='<b>Sensor</b>')
fig.show()