In [1]:
import pandas as pd
import numpy as np
import os
from scipy import stats

In [2]:
data_dir = "Data"
dailyActivity = pd.read_csv(os.path.join(data_dir,"dailyActivity_merged.csv"))
weightLog = pd.read_csv(os.path.join(data_dir,"weightLogInfo_merged.csv"))

In [3]:
dailyActivity.head()

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
0,1503960366,4/12/2016,13162,8.5,8.5,0.0,1.88,0.55,6.06,0.0,25,13,328,728,1985
1,1503960366,4/13/2016,10735,6.97,6.97,0.0,1.57,0.69,4.71,0.0,21,19,217,776,1797
2,1503960366,4/14/2016,10460,6.74,6.74,0.0,2.44,0.4,3.91,0.0,30,11,181,1218,1776
3,1503960366,4/15/2016,9762,6.28,6.28,0.0,2.14,1.26,2.83,0.0,29,34,209,726,1745
4,1503960366,4/16/2016,12669,8.16,8.16,0.0,2.71,0.41,5.04,0.0,36,10,221,773,1863


In [4]:
#weightLog.head()
weightLog[weightLog.Id==1503960366]

Unnamed: 0,Id,Date,WeightKg,WeightPounds,Fat,BMI,IsManualReport,LogId
0,1503960366,5/2/2016 11:59:59 PM,52.599998,115.963147,22.0,22.65,True,1462233599000
1,1503960366,5/3/2016 11:59:59 PM,52.599998,115.963147,,22.65,True,1462319999000


In [5]:
# make dictionary mapping Ids to a tuple containing average BMI and weight
weightIds = weightLog.Id.unique()
weight_dict = dict()
for Id in weightIds:
    Id_info = weightLog[weightLog.Id==Id]
    mean_bmi = np.mean(Id_info['BMI'])
    mean_weightKg = np.mean(Id_info['WeightKg'])
    weight_dict[Id] = (mean_bmi,mean_weightKg)

In [6]:
weight_dict

{1503960366: (22.6499996185303, 52.5999984741211),
 1927972279: (47.5400009155273, 133.5),
 2873212765: (21.57000064849855, 57.0),
 4319703577: (27.41499996185305, 72.35000228881836),
 4558609924: (27.213999938964843, 69.63999938964844),
 5577150313: (28.0, 90.6999969482422),
 6962181067: (24.027999750773112, 61.553333791097),
 8877689391: (25.48708335558574, 85.14583428700765)}

In [7]:
# add weight dict information to dailyActivity
dailyAct_weight = dailyActivity.copy()
dailyAct_weight.head()

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
0,1503960366,4/12/2016,13162,8.5,8.5,0.0,1.88,0.55,6.06,0.0,25,13,328,728,1985
1,1503960366,4/13/2016,10735,6.97,6.97,0.0,1.57,0.69,4.71,0.0,21,19,217,776,1797
2,1503960366,4/14/2016,10460,6.74,6.74,0.0,2.44,0.4,3.91,0.0,30,11,181,1218,1776
3,1503960366,4/15/2016,9762,6.28,6.28,0.0,2.14,1.26,2.83,0.0,29,34,209,726,1745
4,1503960366,4/16/2016,12669,8.16,8.16,0.0,2.71,0.41,5.04,0.0,36,10,221,773,1863


In [8]:
dailyAct_weight = dailyAct_weight.reindex(columns = dailyAct_weight.columns.tolist() +
                                                        ["BMI", "WeightKg"])

dailyAct_weight.head()

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories,BMI,WeightKg
0,1503960366,4/12/2016,13162,8.5,8.5,0.0,1.88,0.55,6.06,0.0,25,13,328,728,1985,,
1,1503960366,4/13/2016,10735,6.97,6.97,0.0,1.57,0.69,4.71,0.0,21,19,217,776,1797,,
2,1503960366,4/14/2016,10460,6.74,6.74,0.0,2.44,0.4,3.91,0.0,30,11,181,1218,1776,,
3,1503960366,4/15/2016,9762,6.28,6.28,0.0,2.14,1.26,2.83,0.0,29,34,209,726,1745,,
4,1503960366,4/16/2016,12669,8.16,8.16,0.0,2.71,0.41,5.04,0.0,36,10,221,773,1863,,


In [9]:
for idx, row in dailyAct_weight.iterrows():
    Id = dailyAct_weight['Id'][idx]
    try:
        dailyAct_weight['BMI'] = weight_dict[Id][0]
        dailyAct_weight['WeightKg'] = weight_dict[Id][1]
    except KeyError:
        pass

dailyAct_weight.head()

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories,BMI,WeightKg
0,1503960366,4/12/2016,13162,8.5,8.5,0.0,1.88,0.55,6.06,0.0,25,13,328,728,1985,25.487083,85.145834
1,1503960366,4/13/2016,10735,6.97,6.97,0.0,1.57,0.69,4.71,0.0,21,19,217,776,1797,25.487083,85.145834
2,1503960366,4/14/2016,10460,6.74,6.74,0.0,2.44,0.4,3.91,0.0,30,11,181,1218,1776,25.487083,85.145834
3,1503960366,4/15/2016,9762,6.28,6.28,0.0,2.14,1.26,2.83,0.0,29,34,209,726,1745,25.487083,85.145834
4,1503960366,4/16/2016,12669,8.16,8.16,0.0,2.71,0.41,5.04,0.0,36,10,221,773,1863,25.487083,85.145834


In [56]:
# save new dataframe to .csv
processed_data_dir = "ProcessedData"
dailyAct_weight.to_csv(os.path.join(processed_data_dir,"dailyActivity_weight_merged.csv"))

In [25]:
# calculate each new dataframe of individual means
individuals = dailyAct_weight['Id'].unique()
indiv_dict = dict()
for person in individuals:
    indiv_dict[person] = dailyAct_weight[dailyAct_weight['Id']==person].iloc[:,2:].mean(axis=0)
indiv_dailyAct_weight = pd.DataFrame.from_dict(indiv_dict).T
indiv_dailyAct_weight.index.name='Id'
indiv_dailyAct_weight.head()

Unnamed: 0_level_0,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories,BMI,WeightKg
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1503960366,12116.741935,7.809677,7.809677,0.0,2.858387,0.794194,4.152903,0.0,38.709677,19.16129,219.935484,848.16129,1816.419355,25.487083,85.145834
1624580081,5743.903226,3.914839,3.914839,0.0,0.939355,0.360645,2.606774,0.006129,8.677419,5.806452,153.483871,1257.741935,1483.354839,25.487083,85.145834
1644430081,7282.966667,5.295333,5.295333,0.0,0.73,0.951,3.609,0.004,9.566667,21.366667,178.466667,1161.866667,2811.3,25.487083,85.145834
1844505072,2580.064516,1.706129,1.706129,0.0,0.008387,0.049032,1.647419,0.0,0.129032,1.290323,115.451613,1206.612903,1573.483871,25.487083,85.145834
1927972279,916.129032,0.634516,0.634516,0.0,0.095806,0.03129,0.507097,0.0,1.322581,0.774194,38.580645,1317.419355,2172.806452,25.487083,85.145834


In [27]:
processed_data_dir = "ProcessedData"
indiv_dailyAct_weight.to_csv(os.path.join(processed_data_dir,"indiv_dailyActivity_weight_merged.csv"))