In [1]:
import calendar
from itertools import chain
from datetime import datetime
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
####################################################
#xls_files = glob.glob('FitbitData/*.xls')
#activities = [pd.ExcelFile(xls).parse('Activities', index_col=0, parse_dates=True) for xls in xls_files]
#sleep = [pd.ExcelFile(xls).parse('Sleep', usecols=':F') for xls in xls_files]
#sleep = [pd.ExcelFile(xls).parse('Sleep', usecols=':F', index_col=[0,1], parse_dates=True) for xls in xls_files]
#sleep = [pd.ExcelFile(xls).parse('Sleep', usecols=':F', index_col=0, parse_dates=True) for xls in xls_files]
####################################################

# -- STORE ALL FITBIT DATA INTO LISTS OF SHEET --
xls_files = glob.glob('Fitbit/*.xls')

## Functions

In [3]:
def add_weekday(df, not_index=0):
    """Add day of week column"""
    
    # -- USE DATAFRAME'S INDEX IF 'not_index' = 1, OTHERWISE USE DATAFRAME'S 'Date' COLUMN --
    col = df.index
    if not_index:
        col = df['Date']
    
    # -- DETERMINE THE DAY OF WEEK FROM THE DATE AND STORE IN A LIST (USED TO ADD AS A COLUMN TO THE DATAFRAME) --
    weekday = []
    for date in col:
        #weekday.append(datetime.strptime(str(date), '%Y-%m-%d').strftime('%A'))
        weekday.append(calendar.day_name[date.weekday()])
    df.loc[:, 'Weekday'] = weekday

### Function for wrangling sleep data

In [4]:
def sort_sleep_data(list_of_df):
    """Sort sleep data by date"""
    
    for i in range(len(list_of_df)):
        list_of_df[i].sort_values(by='Start Time', ascending=True, inplace=True)

### Functions for wrangling food and macros data

In [5]:
def flatten_list(l):
    """Flatten a list"""
    
    return list(chain(*l))


def get_sheetnames_and_dates(excel_file):
    """Only get sheet names and dates for days where food data is entered"""
    """Returns: 2 lists (sheet names & dates)"""
    
    monthly_calories = pd.ExcelFile(excel_file).parse('Foods')

    dates = []
    sheet_names = []
    
    # -- GET SHEET NAMES AND DATES ONLY FOR THE DAYS WHERE INFO ABOUT FOOD INTAKE IS ENTERED --
        # -- daily[0]: date --
        # -- daily[1]: calories --
    for daily in monthly_calories.values:
        if str(daily[1]) != '0':
            dates.append(datetime.strptime(daily[0], '%Y-%m-%d').date())
            foodLogSheetName = 'Food Log ' + daily[0].replace('-', '')
            sheet_names.append(foodLogSheetName)
    return sheet_names, dates


def remove_empty_rows(xls, sheet_list):
    """Converting data in labeled table format to dataframe format"""
    """Returns: dataframe without unnecessary data"""
    
    df_list = [pd.read_excel(xls, sheet_name=sheet) for sheet in sheet_list]

    # -- REMOVING UNNECESSARY ROWS & FILLING IN 'MISSING' DATA --
    for df in df_list:
        df.dropna(how='all', inplace=True)
        df['Meal'] = df['Meal'].fillna(method='ffill')
        df.reset_index(drop=True, inplace=True)
        
    return df_list


def get_food_and_macros(food_list, dates_list):
    """Get macros information into desirable dataset format"""
    """Returns: 2 dataframes (one with food consumption data & one with daily macros info)"""

    macros_df = pd.DataFrame()
    all_food_df = pd.DataFrame()
    all_macros_df = pd.DataFrame()
    
    # -- SPLITTING DATA INTO TWO DIFFERENT DATAFRAMES --
    for ind, (df, date) in enumerate(zip(food_list, dates_list)):
        
        index_of_macros = []

        # -- STORE INDEX OF ROWS THAT HAVE MACROS DATA --
        for i in df.index:
            if df['Meal'][i] == 'Daily Totals':
                index_of_macros.append(i)

        # -- EXTRACT MACROS DATA FROM THE DATAFRAME. CREATE & STORE MACROS DATA IN A NEW DATAFRAME --
        macros_df = df.iloc[index_of_macros[1]:index_of_macros[-1]+1]
        macros_df['Date'] = date
        macros_df = macros_df.pivot(index='Date', columns='Food', values='Calories')
        all_macros_df = all_macros_df.append(macros_df)
        
        #food_list[ind].drop(index=index_of_macros)
        #food_list[ind].dropna(inplace=True)
        #food_list[ind].reset_index(drop=True, inplace=True)

        # -- ADD 'Date' COLUMN TO THE FOOD DATAFRAME --
        food_list[ind]['Date'] = date

        # -- STORE EACH UPDATED FOOD DATAFRAME INTO A NEW DATAFRAME --
        all_food_df = all_food_df.append(food_list[ind].drop(index=index_of_macros).dropna())

    return all_food_df, all_macros_df

## Data wrangling

### Cleaning activities & sleep data

In [6]:
# -- STORING DATA IN EXCEL AS LISTS DATAFRAMES --
list_of_activities_df = [pd.ExcelFile(xls).parse('Activities', index_col=0, parse_dates=True) for xls in xls_files]
list_of_sleep_df = [pd.ExcelFile(xls).parse('Sleep', usecols=':F') for xls in xls_files]

# -- CONCATING ALL FOOD DATAFRAMES INTO ONE BIG DATAFRAME --
activities = pd.concat(list_of_activities_df)
add_weekday(activities)

# -- CONCATING ALL SLEEP DATAFRAMES INTO ONE BIG DATAFRAME --
sort_sleep_data(list_of_sleep_df)
sleep = pd.concat(list_of_sleep_df)
sleep.reset_index(drop=True, inplace=True)

### Cleaning food data

In [7]:
# -- GET ALL NECESSARY SHEET NAMES AND DATES --
sheet_names, dates = zip(*[get_sheetnames_and_dates(xls) for xls in xls_files])
dates = [d for d in dates if d]

# -- FIRST STEP IN DATA CLEANING: REMOVING ALL EMPTY ROWS --
list_of_food_df = [remove_empty_rows(xls, monthly_sheets) for xls, monthly_sheets in zip(xls_files, sheet_names) if monthly_sheets]

# -- SECOND STEP IN DATA CLEANING: SPLIT ORIGINAL DATAFRAMES INTO SEPARATE DATAFRAMES --
list_of_food_df, list_of_macros_df = zip(*[get_food_and_macros(monthly_food, date) for monthly_food, date in zip(list_of_food_df, dates)])

macros = pd.concat(list_of_macros_df)
food = pd.concat(list_of_food_df)
food.reset_index(drop=True, inplace=True)

add_weekday(macros)
add_weekday(food, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
activities

Unnamed: 0_level_0,Calories Burned,Steps,Distance,Floors,Minutes Sedentary,Minutes Lightly Active,Minutes Fairly Active,Minutes Very Active,Activity Calories,Weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-01,1949,11458,5.26,26,610,209,3,38,907,Friday
2016-01-02,2283,14675,5.89,18,581,323,12,53,1352,Saturday
2016-01-03,2007,12694,5.40,48,469,280,9,21,1018,Sunday
2016-01-04,2499,17375,6.94,48,381,411,17,62,1651,Monday
2016-01-05,2413,17631,7.06,59,468,452,7,38,1566,Tuesday
2016-01-06,2331,15018,6.00,21,531,325,29,66,1423,Wednesday
2016-01-07,2391,14732,5.87,46,544,319,16,77,1473,Thursday
2016-01-08,2660,20864,8.33,21,510,451,47,62,1873,Friday
2016-01-09,2286,16101,6.42,18,433,324,23,44,1366,Saturday
2016-01-10,1599,4972,2.01,19,534,194,0,0,511,Sunday


In [9]:
sleep

Unnamed: 0,Start Time,End Time,Minutes Asleep,Minutes Awake,Number of Awakenings,Time in Bed
0,2015-12-31 8:44PM,2016-01-01 7:06AM,597,25,1,622
1,2016-01-01 9:26PM,2016-01-02 5:59AM,488,24,2,513
2,2016-01-02 10:08PM,2016-01-03 5:57AM,456,13,1,469
3,2016-01-03 10:27PM,2016-01-04 5:06AM,368,31,1,399
4,2016-01-03 6:15PM,2016-01-03 9:46PM,199,12,1,211
5,2016-01-04 7:37PM,2016-01-05 4:54AM,536,20,2,556
6,2016-01-05 8:59PM,2016-01-06 5:16AM,461,36,3,497
7,2016-01-06 9:07PM,2016-01-07 5:03AM,465,11,2,476
8,2016-01-07 8:59PM,2016-01-08 4:48AM,442,27,2,469
9,2016-01-08 10:38PM,2016-01-09 6:37AM,446,33,2,479


In [10]:
food

Unnamed: 0,Meal,Food,Calories,Date,Weekday
0,Breakfast,Canadian Bacon,23,2016-01-01,Friday
1,Breakfast,American Cheese,79,2016-01-01,Friday
2,Breakfast,Grapefruit,60,2016-01-01,Friday
3,Breakfast,"English Muffin, Original",135,2016-01-01,Friday
4,Breakfast,"Egg, Chicken, Fried",184,2016-01-01,Friday
5,Morning Snack,Peppermint Patties,280,2016-01-01,Friday
6,Breakfast,American Cheese,79,2016-01-04,Monday
7,Breakfast,Canadian Bacon,70,2016-01-04,Monday
8,Breakfast,"English Muffin, Original",135,2016-01-04,Monday
9,Breakfast,"Egg, Chicken, Fried",184,2016-01-04,Monday


In [11]:
macros

Food,Calories,Carbs,Fat,Fiber,Protein,Sodium,Water,Weekday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-01-01,761,100 g,26 g,7 g,26 g,"1,029 mg",0 fl oz,Friday
2016-01-04,968,94 g,44 g,6 g,33 g,"1,086 mg",0 fl oz,Monday
2016-01-07,1151,178 g,32 g,11 g,33 g,"1,163 mg",0 fl oz,Thursday
2016-01-08,868,115 g,27 g,12 g,31 g,"1,256 mg",0 fl oz,Friday
2016-01-09,444,31 g,19 g,9 g,32 g,607 mg,0 fl oz,Saturday
2016-01-11,738,73 g,34 g,5 g,28 g,"1,046 mg",0 fl oz,Monday
2016-01-12,528,44 g,22 g,7 g,26 g,"1,029 mg",0 fl oz,Tuesday
2016-01-13,540,55 g,30 g,7 g,18 g,347 mg,0 fl oz,Wednesday
2016-01-14,738,49 g,41 g,10 g,38 g,"1,399 mg",0 fl oz,Thursday
2016-01-15,1013,91 g,48 g,17 g,56 g,"1,552 mg",0 fl oz,Friday
