## Description

- This notebook will display patterns in hourly data between active groups

In [1]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.ticker import (MultipleLocator, MaxNLocator)

## Load Data

In [2]:
hourly_df=pd.read_csv('hourly_df.csv',header=0)
hourly_df.head(5)

Unnamed: 0,Id,ActivityHour,TotalIntensity,AverageIntensity,StepTotal,Calories,ActiveGroup
0,1503960366,4/12/2016 12:00:00 AM,20,0.333333,373,81,Active
1,1503960366,4/12/2016 1:00:00 AM,8,0.133333,160,61,Active
2,1503960366,4/12/2016 2:00:00 AM,7,0.116667,151,59,Active
3,1503960366,4/12/2016 3:00:00 AM,0,0.0,0,47,Active
4,1503960366,4/12/2016 4:00:00 AM,0,0.0,0,48,Active


## Calculate Hourly Averages

- We want to observe whether there are any differences in the hourly behaviour of users in different active groups

In [3]:
hourly_df['ActivityHour']=pd.to_datetime(hourly_df['ActivityHour'])

In [4]:
## Add in "Hour" column to be able to calculate averages by hour of day
hourly_df['Hour']=hourly_df['ActivityHour'].dt.hour

In [5]:
hourly_df_avg=hourly_df[['Id','AverageIntensity','StepTotal','Calories','ActiveGroup','Hour']].groupby(['Id','Hour','ActiveGroup']).mean().reset_index()
hourly_df_avg.head(5)

Unnamed: 0,Id,Hour,ActiveGroup,AverageIntensity,StepTotal,Calories
0,1503960366,0,Active,0.123333,142.666667,60.966667
1,1503960366,1,Active,0.058333,50.1,53.866667
2,1503960366,2,Active,0.029444,29.0,50.266667
3,1503960366,3,Active,0.015556,11.833333,48.633333
4,1503960366,4,Active,0.005556,3.6,47.7


## Plots by Attributes

In [6]:
def plot_by_att(df,x,y,group,filt_group=''):
    
    if filt_group != '':
        plot_df=df.loc[df['ActiveGroup']==filt_group]
    else:
        plot_df = df
        
    fig,ax=plt.subplots(figsize=(15, 5))
    sns.lineplot(x=plot_df[x],y=plot_df[y],hue=plot_df[group],palette='Set2')
    ax.xaxis.set_major_locator(MaxNLocator())
    

### Plot Avg Calories Burned for One Week
- Observe the seasonality of data
- Very Active Users are consistently burning more calories over the week

In [None]:
plot_by_att(hourly_df.loc[hourly_df['ActivityHour']<='04/19/2016'],'ActivityHour','Calories','ActiveGroup','')

### Plot Average Intensity by Hour
- Intensities are based on intensity scores from Fitbit

In [None]:
plot_by_att(hourly_df_avg,'Hour','AverageIntensity','ActiveGroup','')


### Plot Average Calories Burned by Hour

In [None]:
plot_by_att(hourly_df_avg,'Hour','Calories','ActiveGroup','')


### Plot Average Step Count by Hour

In [None]:
plot_by_att(hourly_df_avg,'Hour','StepTotal','ActiveGroup',)