In [433]:
import pandas as pd
import numpy as np
from dateutil import parser
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()

In [613]:
# Resting HR from Apple Watch
resting_hr = pd.read_csv('HKQuantityTypeIdentifierRestingHeartRate.csv', parse_dates=True, index_col='Date')

In [614]:
#resting_hr = resting_hr['Value']
resting_hr['Date'] = pd.to_datetime(resting_hr['Date'], unit='s')
resting_hr.head()

Unnamed: 0_level_0,Value,Unit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-13 22:40:41-07:00,65.0,count/min
2020-10-14 21:19:02-07:00,63.0,count/min
2020-10-15 18:58:04-07:00,64.0,count/min
2020-10-16 22:41:50-07:00,55.0,count/min
2020-10-17 21:10:58-07:00,55.0,count/min


In [615]:
hr_mean = resting_hr['Value'].mean(axis=0)

In [616]:
resting_hr.iplot(y='Value', yTitle='Resting HR')

In [617]:
# There should only be 324 entries, so there are duplicates
resting_hr = resting_hr.resample('D').sum()

# 71 is the highest value, so any values above that will be duplicated for that particular day
# all of these are duplicate entires, so replace these with the average resting HR
resting_hr['Value'] = resting_hr['Value'].apply(lambda x: hr_mean if x > 71.0 else x)
resting_hr.loc[resting_hr['Value'] == 0.0] = hr_mean 


In [621]:
# Weather https://www.ncdc.noaa.gov/cdo-web/
weather = pd.read_csv('2700509.csv', parse_dates=True, index_col='DATE')
weather.head()
weather = weather[['TMAX']]
#weather['DATE'] = pd.to_datetime(weather['DATE'])

In [623]:
weather.iplot(y='TMAX', yTitle='Maximum temperature (\u00B0F)')

In [630]:
distance = pd.read_csv('HKQuantityTypeIdentifierDistanceWalkingRunning.csv', parse_dates=True, index_col='Date')

In [632]:


# Convert datetime strings to datetime objects
#distance['Datetime'] = pd.to_datetime(distance['Date'])
distance.head()

# Because the Apple Watch records small intervals of activity over the course of a day, the pandas resample method can be used to get a sum of distance walked/ran over each day
dist2 = distance.resample('D').sum()
dist2.iplot(yTitle='Distance walk+run (mi)')
# There's something wrong with this plot. The longest distance I've run is a half marathon, but according to this graph, I've walked/run more than one.
# Let's see what's going on with the data.

In [636]:
# Let's look at January 3, 2021
mask = (distance.index >= '2021-01-03') & (distance.index < '2021-01-04')
df = distance.loc[mask]


df[df['Value'] > 1.0]

# It looks like when you record an activity, the Apple Watch dumps it all at once. There's clearly some GPS weirdness going on, so let's filter out any distances < 1.0 mi
#distance[distance['Value'] >= 1.0].iplot()

Unnamed: 0_level_0,Value,Unit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-03 18:04:34-07:00,7.0256,mi


In [637]:
distance[distance['Value'] > 1.0].iplot(y='Value', yTitle='Distance walk+run (mi)')
# The numbers still aren't adding up. Luckily, I have consistently tracked my workouts with Strava, so I'll use that data instead.

In [638]:
# Some activities were manually added to this spreadsheet
strava = pd.read_csv('activities.csv', parse_dates=True, index_col='Activity Date')

In [639]:
strava.head()

Unnamed: 0_level_0,Activity ID,Activity Name,Activity Type,Activity Description,Elapsed Time,Distance,Relative Effort,Commute,Activity Gear,Filename,...,Precipitation Type,Cloud Cover,Weather Visibility,UV Index,Weather Ozone,"<span class=""translation_missing"" title=""translation missing: en-US.lib.export.portability_exporter.activities.horton_values.jump_count"">Jump Count</span>","<span class=""translation_missing"" title=""translation missing: en-US.lib.export.portability_exporter.activities.horton_values.total_grit"">Total Grit</span>","<span class=""translation_missing"" title=""translation missing: en-US.lib.export.portability_exporter.activities.horton_values.avg_flow"">Avg Flow</span>","<span class=""translation_missing"" title=""translation missing: en-US.lib.export.portability_exporter.activities.horton_values.flagged"">Flagged</span>","<span class=""translation_missing"" title=""translation missing: en-US.lib.export.portability_exporter.activities.horton_values.avg_elapsed_speed"">Avg Elapsed Speed</span>"
Activity Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-03-21 23:20:20,909484000.0,Afternoon Ride,Ride,,756.0,3.97,,False,,activities/909484019.gpx,...,,,,,,,,,,
2017-04-08 22:31:55,933794100.0,Afternoon Ride,Ride,,2277.0,13.02,,False,,activities/933794093.gpx,...,,,,,,,,,,
2017-04-15 20:33:59,944015500.0,Afternoon Ride,Ride,,8134.0,49.33,,False,,activities/944015496.gpx,...,,,,,,,,,,
2017-10-10 19:47:51,1226077000.0,Lunch Ride,Ride,,1740.0,0.63,,False,,activities/1226076972.gpx,...,,,,,,,,,,
2017-10-11 19:32:55,1226078000.0,Yosemite,Ride,,15329.0,12.53,,False,,activities/1226077715.gpx,...,,,,,,,,,,


In [640]:
# I want a few statistics that might be related to metrics that the Apple Watch tracks, like sleep time or resting heartrate.
strava = strava[['Activity Type', 'Elapsed Time', 'Distance', 'Elevation Gain', 'Average Grade', 'Average Speed']]

# Convert string to datetime object
#strava['Activity Date'] = pd.to_datetime(strava['Activity Date'])

In [641]:
# Filter out any data from before the Apple Watch data (October 13, 2020)
strava = strava.loc[strava.index > '2020-10-13']

In [642]:
# Strava's data is in SI units. Convert to Imperial units. 
strava['Distance'] = strava['Distance'] / 1.60934  # km to mi
strava['Elevation Gain'] = strava['Elevation Gain'] * 3.28084  # m to ft
strava['Average Speed'] = strava['Average Speed'] / 0.621371  # km/h to mi/h

In [677]:
strava.head()

Unnamed: 0_level_0,Activity Type,Elapsed Time,Distance,Elevation Gain,Average Grade,Average Speed
Activity Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-10-16,Ride,,7.14,,,
2020-10-19,Run,,3.06,,,
2020-10-21,Run,,3.26,,,
2020-10-23,Ride,,7.22,,,
2020-10-29,Ride,,8.25,,,


In [666]:
# Sleep
# Apple Watch tends to greatly overestimate the amount of sleep I get. If I'm reading in bed, it usually counts that as sleep.
# I use an app called AutoSleep instead--it's fairly accurate. I've only been using it since December 2020, so I'll have to make an educated guess to fill in October 2020-November 2020
sleep = pd.read_csv('AutoSleep.csv', parse_dates=True, index_col='toDate')


#sleep['toDate'] = pd.to_datetime(sleep['toDate'])
sleep = sleep[['asleep']]


# Need to convert the "asleep" column from hh:mm:ss to decimal hours
def to_hours(t):
    temp = t.split(':')
    
    # convert to hours
    hours = float(temp[0])
    minutes = float(temp[1]) / 60.0
    seconds = float(temp[2]) / 3600.0
    
    return hours + minutes + seconds

sleep.head()


Unnamed: 0_level_0,asleep
toDate,Unnamed: 1_level_1
2020-12-06,7:57:00
2020-12-07,7:37:00
2020-12-08,9:02:00
2020-12-09,8:39:00
2020-12-10,8:39:00


In [667]:
sleep['asleep'] = sleep['asleep'].apply(to_hours)
sleep.iplot(y='asleep', yTitle='Time asleep (hours)')

In [668]:
sleep_mean = sleep['asleep'].mean(axis=0)

In [669]:
# I'll fill in the missing data by taking the average of the sleep data I have starting on December 6, 2020.
dates = pd.date_range(start='10/13/2020', end='12/5/2020', freq='D')
time_asleep = [sleep_mean] * len(dates)
pd.DataFrame(index=dates, data=time_asleep, columns=['asleep'])
sleep = pd.concat([pd.DataFrame(index=dates, data=time_asleep, columns=['asleep']), sleep])

Unnamed: 0,asleep
2020-10-13,8.017688
2020-10-14,8.017688
2020-10-15,8.017688
2020-10-16,8.017688
2020-10-17,8.017688


In [670]:
sleep.iplot(y='asleep', yTitle='Time asleep (hours)')

In [671]:
# There should be 323 entires, one for each day. Resample and sum so any missing time series rows are now 0
# Any 0's are from when I didn't wear the watch to bed. Replace them with the mean time asleep, 7.95 hours.
sleep = sleep.resample('D').sum()
sleep.loc[sleep['asleep'] == 0.0] = sleep_mean

In [680]:
# Putting it all together
#resting_hr['Resting HR']
#weather['TMAX']
#strava['Distance']
sleep['asleep']
df = pd.concat([weather.tz_localize(None), resting_hr.tz_localize(None), sleep.tz_localize(None)], axis=1)

In [686]:
# Rename columns to something more descriptive
df.rename(columns={"TMAX": "Temperature High (\u00B0F)", "Value": "Resting HR (BPM)", "asleep": "Time Asleep (hours)"})

Unnamed: 0,Temperature High (°F),Resting HR (BPM),Time Asleep (hours)
2020-10-13,65.0,65.0,8.017688
2020-10-14,62.0,63.0,8.017688
2020-10-15,63.0,64.0,8.017688
2020-10-16,70.0,55.0,8.017688
2020-10-17,69.0,55.0,8.017688
...,...,...,...
2021-08-28,88.0,49.0,7.433333
2021-08-29,90.0,49.0,8.916667
2021-08-30,72.0,49.0,7.533333
2021-08-31,72.0,50.0,8.866667
