# Apple Health Analysis General

## Setup

### Import sub-modules

In [1]:
# python
import sys
import os
import datetime
import enum

# iPython
import IPython
from IPython.display import display
from IPython.display import Image

# pandas
import pandas as pd

# numpy
import numpy as np

# plotly
import plotly as ply
import plotly.figure_factory as ff
ply.offline.init_notebook_mode(connected=True)
import plotly.io as pio

# watermark
import watermark
%load_ext watermark
%watermark -a "Silvan Zahno" -d -v -iv -m -h

pandas    0.24.2
IPython   7.4.0
plotly    3.8.1
numpy     1.16.2
watermark 1.8.1
Silvan Zahno 2019-07-02 

CPython 3.7.3
IPython 7.4.0

compiler   : MSC v.1915 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 142 Stepping 10, GenuineIntel
CPU cores  : 8
interpreter: 64bit
host name  : WE6996


### Configuration

In [2]:
# Setup local input directories
inputDir = "in/"

if (os.path.exists(inputDir)) is False:
    os.makedirs(inputDir)
if (os.path.isdir(inputDir)) is False:
    raise NotADirectoryError("{} is not a directory".format(inputDir))

In [3]:
# Setup local input directories
outputDir = "out/"

if (os.path.exists(outputDir)) is False:
    os.makedirs(outputDir)
if (os.path.isdir(outputDir)) is False:
    raise NotADirectoryError("{} is not a directory".format(outputDir))

In [6]:
# Graph output Options
class GraphOutputOption(enum.Enum):
    none = 'none'                     # Do not generate any plots
    inline = 'inline'                 # Generate inline plots only
    htmlFile = 'extFile'              # Generate plots in external files (html or png or ...)
    both = 'both'                     # Generate all plots inline and external
    
class GraphInteractionOption(enum.Enum):
    static = 'static'                 # Generate static inline plots (as images)
    interactive = 'interactive'       # Generate interactive inline plots

notebookGraphingInteraction = GraphInteractionOption('interactive')
notebookGraphingOutputs = GraphOutputOption('both')

ext_file = ".svg"

staticImageSize = {'width':1000, 'height':500, 'scale':1}

GraphAutoOpenHTML = False              # Auto open external HTML files [True/False]

class PrintOutputOption(enum.Enum):
    none = 'none'                     # Do not output either to file or console
    console = 'console'               # Send to console
    file = 'file'                     # Send to file
    both = 'both'                     # Send to console and file


In [7]:
# Pandas output options
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
pd.set_option('display.width', 100)

### Common functions

In [8]:
# Plot figures
def plot_figure(graphFilename, fig, overrideNotebookGraphingInteraction=None, height=350):
    """Creates the graph plots depending on the choosen option

    Args:
        graphFilename: Filename of the output html-file in case of Option htmlfile
        fig: ply.graph_objs.Figure(data=traces, layout=mylayout)
        overrideNotebookGraphingInteraction: overrides graphic output for this figure
        height: height of the static image export
    Returns:
        None
    Raises:
        None

    """
    if (notebookGraphingOutputs == GraphOutputOption('extFile')) or (notebookGraphingOutputs == GraphOutputOption('both')):
        if (os.path.splitext(graphFilename)[1] == '.png' or 
           os.path.splitext(graphFilename)[1] == '.jpg' or 
           os.path.splitext(graphFilename)[1] == '.svg' or 
           os.path.splitext(graphFilename)[1] == '.pdf'):
            pio.write_image(fig, graphFilename, height=height)
        elif os.path.splitext(graphFilename)[1] == '.html':
            ply.offline.plot(fig, filename=graphFilename, auto_open=GraphAutoOpenHTML)
    if (notebookGraphingOutputs == GraphOutputOption('inline')) or (notebookGraphingOutputs == GraphOutputOption('both')):
        choice = None
        if overrideNotebookGraphingInteraction == None:
            if notebookGraphingInteraction == GraphInteractionOption('static'):
                choice = 'static'
            elif notebookGraphingInteraction == GraphInteractionOption('interactive'):
                choice = 'interactive'
        elif overrideNotebookGraphingInteraction == GraphInteractionOption('static'):
            choice = 'static'
        elif overrideNotebookGraphingInteraction == GraphInteractionOption('interactive'):
            choice = 'interactive'
        else:
            choice = 'static'
            
        if choice == 'static':
            plot_img = pio.to_image(fig, format='png', width=staticImageSize['width'], height=staticImageSize['height'], scale=staticImageSize['scale'])
            display(Image(plot_img))
        else:
            ply.offline.iplot(fig)
            

In [10]:
def print_file(text="", file=None, outputOption=PrintOutputOption('console'), append=True):
    """Custom print function to print to console and/or file

    Args:
        text: String to be used
        file: output file path for fileoutput
        fileoutput: bool sends string to file
        consoleoutput: PrintOutputOption to specify where to send the string
        append: For fileoutput only, create new file or append to existing
    Returns:
        None
    Raises:
        None

    """
    if (outputOption == PrintOutputOption('file') or outputOption == PrintOutputOption('both')):
        if append:
            with open(file, "a+") as file:
                file.write(text+"\n")#os.linesep)
        else:
            with open(file, "w+") as file:
                file.write(text+"\n")#os.linesep)
    if (outputOption == PrintOutputOption('console') or outputOption == PrintOutputOption('both')):
        print(text)

## Data import

In [96]:
df_list = {
'energy': 0,
'alcohol': 1,
'fat': 2,
'weight': 3,
'bmi': 4,
'caffeine': 5,
'water': 6,
'cycling': 7,
'walking': 8,
'flightsclimbed': 9,
'heartrate': 10,
'height': 11,
'sleep': 12,
'steps': 13,
'workout': 14
}
df_description = {
'energy': "Active Energy Burned",
'alcohol': "Blood Alcohol Content",
'fat': "Body Fat Percentage",
'weight': "Body Mass",
'bmi': "Body Mass Index",
'caffeine': "Dietary Caffeine",
'water': "Dietary Water",
'cycling': "Distance Cycling",
'walking': "Distance Walink & Running",
'flightsclimbed': "Number of Flights Climbed",
'heartrate': "Heart Rate",
'height': "Height",
'sleep': "Sleep Analysis",
'steps': "Step Count",
'workout': "Workout"
}
dfs = []
dfs.append(pd.read_csv(inputDir + "ActiveEnergyBurned.csv"))
dfs.append(pd.read_csv(inputDir + "BloodAlcoholContent.csv"))
dfs.append(pd.read_csv(inputDir + "BodyFatPercentage.csv"))
dfs.append(pd.read_csv(inputDir + "BodyMass.csv"))
dfs.append(pd.read_csv(inputDir + "BodyMassIndex.csv"))
dfs.append(pd.read_csv(inputDir + "DietaryCaffeine.csv"))
dfs.append(pd.read_csv(inputDir + "DietaryWater.csv"))
dfs.append(pd.read_csv(inputDir + "DistanceCycling.csv"))
dfs.append(pd.read_csv(inputDir + "DistanceWalkingRunning.csv"))
dfs.append(pd.read_csv(inputDir + "FlightsClimbed.csv"))
dfs.append(pd.read_csv(inputDir + "HeartRate.csv"))
dfs.append(pd.read_csv(inputDir + "Height.csv"))
dfs.append(pd.read_csv(inputDir + "SleepAnalysis.csv"))
dfs.append(pd.read_csv(inputDir + "StepCount.csv"))
dfs.append(pd.read_csv(inputDir + "Workout.csv"))

## Analysis

### Columns

In [100]:
for measurement in df_list:
    print("\n{}".format(df_description[measurement]))
    for col in dfs[df_list[measurement]].columns:
        print("  * {} - {}".format(col, dfs[df_list[measurement]][col].dtypes))


Active Energy Burned
  * sourceName - object
  * sourceVersion - object
  * device - float64
  * type - object
  * unit - object
  * creationDate - object
  * startDate - object
  * endDate - object
  * value - int64

Blood Alcohol Content
  * sourceName - object
  * sourceVersion - int64
  * device - float64
  * type - object
  * unit - object
  * creationDate - object
  * startDate - object
  * endDate - object
  * value - float64

Body Fat Percentage
  * sourceName - object
  * sourceVersion - float64
  * device - float64
  * type - object
  * unit - object
  * creationDate - object
  * startDate - object
  * endDate - object
  * value - float64

Body Mass
  * sourceName - object
  * sourceVersion - float64
  * device - float64
  * type - object
  * unit - object
  * creationDate - object
  * startDate - object
  * endDate - object
  * value - float64

Body Mass Index
  * sourceName - object
  * sourceVersion - int64
  * device - float64
  * type - object
  * unit - object
  * crea

### Describe

In [101]:
dfs[df_list['energy']].describe()

Unnamed: 0,device,value
count,0.0,110.0
mean,,1333.409091
std,,1046.909951
min,,0.0
25%,,644.25
50%,,1100.0
75%,,1625.25
max,,7504.0


In [62]:
dfs[df_list['alcohol']].describe()

Unnamed: 0,sourceVersion,device,value
count,4.0,0.0,4.0
mean,754.0,,0.25
std,0.0,,0.0
min,754.0,,0.25
25%,754.0,,0.25
50%,754.0,,0.25
75%,754.0,,0.25
max,754.0,,0.25


In [63]:
dfs[df_list['fat']].describe()

Unnamed: 0,sourceVersion,device,value
count,0.0,0.0,2.0
mean,,,5.4275
std,,,7.527152
min,,,0.105
25%,,,2.76625
50%,,,5.4275
75%,,,8.08875
max,,,10.75


In [64]:
dfs[df_list['weight']].describe()

Unnamed: 0,sourceVersion,device,value
count,20.0,0.0,21.0
mean,754.0,,71.604762
std,0.0,,0.643798
min,754.0,,70.0
25%,754.0,,71.9
50%,754.0,,71.9
75%,754.0,,71.9
max,754.0,,71.9


In [65]:
dfs[df_list['bmi']].describe()

Unnamed: 0,sourceVersion,device,value
count,20.0,0.0,20.0
mean,754.0,,17.7825
std,0.0,,7.666371
min,754.0,,0.0
25%,754.0,,20.584375
50%,754.0,,21.008
75%,754.0,,21.008
max,754.0,,21.01


In [66]:
dfs[df_list['caffeine']].describe()

Unnamed: 0,sourceVersion,device,value
count,4.0,0.0,4.0
mean,754.0,,5.24
std,0.0,,3.995264
min,754.0,,1.78
25%,754.0,,1.78
50%,754.0,,5.24
75%,754.0,,8.7
max,754.0,,8.7


In [77]:
dfs[df_list['water']].describe()

Unnamed: 0,sourceVersion,device,value
count,18.0,0.0,18.0
mean,754.0,,0.344444
std,0.0,,0.145409
min,754.0,,0.2
25%,754.0,,0.3
50%,754.0,,0.3
75%,754.0,,0.3
max,754.0,,0.8


In [68]:
dfs[df_list['cycling']].describe()

Unnamed: 0,device,value
count,0.0,17.0
mean,,20.662688
std,,9.365187
min,,11.4823
25%,,14.4548
50%,,19.2007
75%,,24.6743
max,,43.0062


In [69]:
dfs[df_list['walking']].describe()

Unnamed: 0,value
count,185915.0
mean,0.046744
std,0.416731
min,0.0
25%,0.003654
50%,0.00813
75%,0.022754
max,98.3011


In [70]:
dfs[df_list['flightsclimbed']].describe()

Unnamed: 0,value
count,52615.0
mean,1.167937
std,2.720212
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,392.0


In [71]:
dfs[df_list['heartrate']].describe()

Unnamed: 0,sourceVersion,device,value
count,56.0,0.0,56.0
mean,5281.964286,,59.785714
std,552.737254,,9.134777
min,5070.0,,47.0
25%,5151.25,,54.75
50%,5196.0,,59.0
75%,5224.75,,62.25
max,8112.0,,113.0


In [72]:
dfs[df_list['height']].describe()

Unnamed: 0,sourceVersion,device,value
count,0.0,0.0,1.0
mean,,,185.0
std,,,
min,,,185.0
25%,,,185.0
50%,,,185.0
75%,,,185.0
max,,,185.0


In [76]:
dfs[df_list['sleep']].describe()

Unnamed: 0,sourceVersion,unit
count,361.0,0.0
mean,4761.980609,
std,1288.929637,
min,50.0,
25%,4906.0,
50%,4925.0,
75%,5193.0,
max,8112.0,


In [74]:
dfs[df_list['steps']].describe()

Unnamed: 0,value
count,179535.0
mean,60.222759
std,232.776342
min,1.0
25%,5.0
50%,12.0
75%,35.0
max,42380.0


In [75]:
dfs[df_list['workout']].describe()

Unnamed: 0,device,duration,totalDistance,totalEnergyBurned
count,0.0,110.0,110.0,110.0
mean,,339.268291,15.067002,1333.409091
std,,1235.356934,13.431397,1046.909951
min,,0.035976,0.0,0.0
25%,,104.025,6.848906,644.25
50%,,194.789262,11.406813,1100.0
75%,,305.576855,18.883543,1625.25
max,,13028.883333,98.301125,7504.0


## Statistics

In [120]:
for measurement in df_list:
    if measurement == 'sleep':
        print("\n{}".format(df_description[measurement]))
        print("  * Time between {} - {}".format(dfs[df_list[measurement]].startDate.iat[0], dfs[df_list[measurement]].startDate.iat[-1]))
        print("  * Measured by {}".format(dfs[df_list[measurement]].sourceName.iat[0]))
        print("  * {} measurements".format(len(dfs[df_list[measurement]].value)))
        #print("  * Total {} {}".format(dfs[df_list[measurement]].value.sum(), dfs[df_list
    elif measurement == 'workout':
        print("\n{}".format(df_description[measurement]))
        print("  * Time between {} - {}".format(dfs[df_list[measurement]].startDate.iat[0], dfs[df_list[measurement]].startDate.iat[-1]))
        print("  * Measured by {}".format(dfs[df_list[measurement]].sourceName.iat[0]))
        print("  * {} measurements".format(len(dfs[df_list[measurement]])))
    else:
        print("\n{}".format(df_description[measurement]))
        print("  * Time between {} - {}".format(dfs[df_list[measurement]].startDate.iat[0], dfs[df_list[measurement]].startDate.iat[-1]))
        print("  * Measured by {}".format(dfs[df_list[measurement]].sourceName.iat[0]))
        print("  * {} measurements".format(len(dfs[df_list[measurement]].value)))
        print("  * Total {} {}".format(dfs[df_list[measurement]].value.sum(), dfs[df_list[measurement]].unit.iat[0]))     


Active Energy Burned
  * Time between 2014-11-25 18:22:13 +0200 - 2019-07-02 14:42:10 +0200
  * Measured by Runtastic
  * 110 measurements
  * Total 146675 kcal

Blood Alcohol Content
  * Time between 2019-01-22 11:52:00 +0200 - 2019-01-23 14:06:00 +0200
  * Measured by Shortcuts
  * 4 measurements
  * Total 1.0 %

Body Fat Percentage
  * Time between 2014-11-01 20:55:00 +0200 - 2014-11-01 20:56:00 +0200
  * Measured by Health
  * 2 measurements
  * Total 10.855 %

Body Mass
  * Time between 2014-11-01 20:55:00 +0200 - 2019-01-23 09:27:00 +0200
  * Measured by Health
  * 21 measurements
  * Total 1503.7000000000003 kg

Body Mass Index
  * Time between 2019-01-21 19:21:00 +0200 - 2019-01-23 09:27:00 +0200
  * Measured by Shortcuts
  * 20 measurements
  * Total 355.65000000000003 count

Dietary Caffeine
  * Time between 2019-01-25 22:24:00 +0200 - 2019-01-26 14:10:00 +0200
  * Measured by Shortcuts
  * 4 measurements
  * Total 20.959999999999997 mg

Dietary Water
  * Time between 2019-0