In [13]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import os
import sys
import dateutil.parser

from __future__ import print_function

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
sys.path.insert(0, os.path.abspath('..'))

from UKMovementSensing import dataprep

## Filepaths

In [42]:
output_path = '/media/sf_VBox_Shared/London/raw/first5/'
annotations_path = output_path + 'tud_first5_deb.csv'
wearcodes_path = output_path + 'wearcodes.csv'

In [43]:
file_path = output_path + 'accelerometer_5second/'

## Run the preprocessing script

This part runs all steps for the preprocessing. This can also be done by calling the script `modelgen.py` directly from the commandline. Use the script:

`python modelgen.py file_path annotations_path wearcodes_path output_path`

In [44]:
annotations = dataprep.process_annotations(annotations_path)
annotations_codes = dataprep.join_wearcodes(wearcodes_path, annotations)
dfs = dataprep.process_data(annotations_codes, file_path)
dataprep.save_merged(dfs, os.path.join(output_path, 'merged/'))
subsets = dataprep.take_subsequences(dfs)
dataprep.save_subsequences(subsets, os.path.join(output_path, 'subsets/'))

Number of missing start and end times: 0 0
starttime of data does not correspond with starttime of annotations!
2015-03-11 04:00:00+00:00 2015-03-15 04:00:00+00:00
starttime of data does not correspond with starttime of annotations!
2015-03-05 04:00:00+00:00 2015-03-08 04:00:00+00:00
starttime of data does not correspond with starttime of annotations!
2015-03-15 04:00:00+00:00 2015-03-16 04:00:00+00:00
starttime of data does not correspond with starttime of annotations!
2015-03-01 04:00:00+00:00 2015-03-03 04:00:00+00:00
starttime of data does not correspond with starttime of annotations!
2015-03-06 04:00:00+00:00 2015-03-07 04:00:00+00:00


## Investigate annotations

In [None]:
print(annotations.shape)
annotations.head()

In [None]:
# To check: Do we have any gaps?
for i in range(1, annotations.shape[0]):
    if annotations['serflag'][i]==annotations['serflag'][i-1] and annotations['tud_day'][i]==annotations['tud_day'][i-1]:
        if (annotations['end_time'][i-1] != annotations['start_time'][i]):
            print(annotations.loc[[i-1, i],['start_time', 'end_time']])
            print(annotations['end_time'][i-1] - annotations['start_time'][i])

## Investigate join with wearcodes

In [None]:
print(annotations_codes.shape)
annotations_codes.head()

## Investigate accelerometer data

In [None]:
example_key = dfs.keys()[0]
binfile, day = example_key
df = dfs[example_key]
df.head()

In [None]:
t = df.index[0]
print(t.tz)

## Investigate subsequences

In [None]:
#Create boxplots for each sequence for the angles
#We expect x (and thus anglex) to be distributed either mostly on the negative or mostly on the positive half
fig, axes = plt.subplots(len(subsets.values()), figsize=(10, 50))
for i, dataset in enumerate(subsets.values()):
    non_sleeping_indices = dataset['act'] != 1.0
    non_sleeping = dataset[non_sleeping_indices]
    print(np.median(non_sleeping['anglex']), np.median(non_sleeping['angley']))
    axes[i].boxplot([non_sleeping['anglex'], non_sleeping['angley'], non_sleeping['anglez']], labels=['x', 'y', 'z']);

In [None]:
subsets = dataprep.switch_positions(subsets)

In [None]:
#Create boxplots for each sequence for the angles
#We expect x (and thus anglex) to be distributed either mostly on the negative or mostly on the positive half
fig, axes = plt.subplots(len(subsets.values()), figsize=(10, 50))
for i, dataset in enumerate(subsets.values()):
    non_sleeping_indices = dataset['act'] != 1.0
    non_sleeping = dataset[non_sleeping_indices]
    print(np.median(non_sleeping['anglex']), np.median(non_sleeping['angley']))
    axes[i].boxplot([non_sleeping['anglex'], non_sleeping['angley'], non_sleeping['anglez']], labels=['x', 'y', 'z']);