In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import os
import sys
import dateutil.parser

from __future__ import print_function

In [None]:
sys.path.insert(0, os.path.abspath('../..'))

from UKMovementSensing import dataprep

In [None]:
execfile('config.py')

## Run the preprocessing script

This part runs all steps for the preprocessing. This can also be done by calling the script `modelgen.py` directly from the commandline. Use the script:

`python modelgen.py file_path annotations_path wearcodes_path output_path`

In [None]:
annotations = dataprep.process_annotations(annotations_path)
annotations_codes = dataprep.join_wearcodes(wearcodes_path, annotations)
dfs = dataprep.process_data(annotations_codes, accelerometer_5sec_path)
dataprep.save_merged(dfs, merged_path)
subsets = dataprep.take_subsequences(dfs)
subsets = dataprep.switch_positions(subsets)
dataprep.save_subsequences(subsets, subset_path)

## Investigate annotations

In [None]:
print(annotations.shape)
annotations.head()

In [None]:
# To check: Do we have any gaps?
for i in range(1, annotations.shape[0]):
    if annotations['serflag'][i]==annotations['serflag'][i-1] and annotations['tud_day'][i]==annotations['tud_day'][i-1]:
        if (annotations['end_time'][i-1] != annotations['start_time'][i]):
            print(annotations.loc[[i-1, i],['start_time', 'end_time']])
            print(annotations['end_time'][i-1] - annotations['start_time'][i])

## Investigate join with wearcodes

In [None]:
print(annotations_codes.shape)
annotations_codes.head()

## Investigate accelerometer data

In [None]:
example_key = dfs.keys()[0]
binfile, day = example_key
df = dfs[example_key]
df.head()

In [None]:
t = df.index[0]
print(t.tz)

## Investigate subsequences

In [None]:
#Create boxplots for each sequence for the angles
#We expect x (and thus anglex) to be distributed either mostly on the negative or mostly on the positive half
fig, axes = plt.subplots(len(subsets.values()), figsize=(10, 50))
for i, dataset in enumerate(subsets.values()):
    non_sleeping_indices = dataset['act'] != 1.0
    non_sleeping = dataset[non_sleeping_indices]
    print(np.median(non_sleeping['anglex']), np.median(non_sleeping['angley']))
    axes[i].boxplot([non_sleeping['anglex'], non_sleeping['angley'], non_sleeping['anglez']], labels=['x', 'y', 'z']);