In [16]:
import pandas as pd

In [142]:
class Preprocess():
    def __init__(self):
        self.rev_21 = pd.read_csv('../data/Daily-Review-2021.csv')
        self.rev_22 = pd.read_csv('../data/Daily-Review - 2022-23.csv')
        
        self.set_datetime_index()
        self.combine_exercise_columns()
        self.align_columns()
    
    def set_datetime_index(self):
        self.rev_21.rename(columns={'Day':'date'}, inplace=True)
        self.rev_22.rename(columns={'Date':'date'}, inplace=True)
        
        self.rev_21['date'] = pd.to_datetime(self.rev_21['date'])
        self.rev_22['date'] = pd.to_datetime(self.rev_22['date'])
        
        self.rev_21.set_index('date', inplace=True)
        self.rev_22.set_index('date', inplace=True)
        
        self.rev_21.sort_index(inplace=True)
        self.rev_22.sort_index(inplace=True)
        
    def combine_exercise_columns(self):
        lamb = lambda x:x == 'Yes'
        self.rev_21['Morning Exercise'] = self.rev_21['Morning Exercise'].apply(lamb)
        self.rev_21['Night Exercise'] = self.rev_21['Night Exercise'].apply(lamb)
        self.rev_21['Exercise'] = self.rev_21[['Morning Exercise','Night Exercise']].any(axis=1)
        
        self.rev_22['Exercised'] = self.rev_22['Exercised'].apply(lamb)
        
    def align_columns(self):
        rev_21_col_map = {
            '📛 Bad Habit Tracking':'day_quality',
            'Productivity':'productivity',
            'Exercise':'exercise',
            'Wake Up Time':'wake_at',
            'Sleep Time':'sleep_at',
            'Nap Hours':'nap_hours',
            'Total Sleep Hours':'sleep_hours_total',
            'Eating Quality (1 - poor, 5 - good)':'eating_quality',
            'Finished Todos (Fixed Number)':'tasks_completed',
            'Total Todos':'tasks_planned',
            'Weight':'weight',
            'Effectiveness Ref':'effectiveness',
            '💰 Money Spend':'expense',
            'Sleep Quality':'sleep_quality'
            }
        
        rev_22_col_map = {
            'Day Quality':'day_quality',
            'Productivity':'productivity',
            'Exercised':'exercise',
            'Wake At':'wake_at',
            'Sleep At':'sleep_at',
            'Nap Hours':'nap_hours',
            'Sleep Hours':'sleep_hours_total',
            'Calories Intake':'eating_quality',
            'Finished Todos (Actual)':'tasks_completed',
            'Total Todos':'tasks_planned',
            'Weight':'weight',
            'Effectiveness':'effectiveness',
            'Money Spend':'expense',
            'Sleep Quality':'sleep_quality'
            }
        
        rev_21_drop_cols = [
            'Weekday', 
            '🌼 Good Habit Tracking', 
            'Effectiveness', 
            'Morning Exercise', 
            'Evaluation After Each Task', 
            'Meditation', 
            'Reading',
            'Code Practice',
            'Night Exercise',
            'Night Reflection',
            'Personal Project',
            'Browsing Social Media Times',
            'Good parts - achievement',
            'Week', 
            'Date',
            'Good Habit Tracking Calculation',
            'Assumptions To Chellenge',
            'Habit', 
            'Today Todos',
            'Bad parts - to Improve',
            'Finished Todos Rollup (Changes over time)',
            ]
        
        rev_22_drop_cols = [
            'Bad Decisions',
            'Bad Decisions Made',
            'Check-Ins',
            'Day Quality - Sleep Late',
            'Day Quality - Sleep Very Late',
            'Day Quality - Wake Up Late',
            'Day Quality - Wake Up Very Late',
            'Did Not Exercise',
            'Disappointments',
            'Food',
            'Good Decisions',
            'Long Nap Hours',
            'Mindset Sculpting',
            'Mood',
            'Night Sleep Hours',
            'Process & Archievements',
            'Productivity - Num',
            'Summary',
            'Things Learned',
            'Weekday',
            '[OLD] Bad Decisions Summary'
            ]
        
        self.rev_21.rename(columns=rev_21_col_map, inplace=True)
        self.rev_22.rename(columns=rev_22_col_map, inplace=True)
        
        self.rev_21.drop(columns=rev_21_drop_cols, inplace=True)
        self.rev_22.drop(columns=rev_22_drop_cols, inplace=True)
        
        # Reorder the 2 dataframes, so columns are all aligned
        self.rev_21 = self.rev_21[rev_21_col_map.values()]
        self.rev_22 = self.rev_22[rev_22_col_map.values()]
        
    def decode_day_quality(self):
        # TODO: Decode the day quality symbol
        pass
    
    def encode_productivity(self):
        # TODO: Encode productivity column with number from -2 to 2
        pass
    
    def decode_wake_and_sleep_time(self):
        # TODO: decode wake_at and sleep_at time
        pass

In [143]:
prep = Preprocess()

In [149]:
prep.rev_21.head()

Unnamed: 0_level_0,day_quality,productivity,exercise,wake_at,sleep_at,nap_hours,sleep_hours_total,eating_quality,tasks_completed,tasks_planned,weight,effectiveness,expense,sleep_quality
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-02-20,⚪️⚪️⚪️🟤🟣⚪️⚪️⚪️,🧭 Reset Day,True,8.0,12.5,0.75,8.25,4.0,,,,,100.0,
2021-02-21,⚪️⚪️⚪️🟤🟣⚪️⚪️⚪️,🧭 Reset Day,True,8.0,1.0,1.0,8.0,4.0,,,,,76.9,
2021-02-22,⚪️⚪️⚪️⚪️🟣⚫️⚪️⚪️,❌ Not Productive,True,6.5,1.6,0.85,5.75,4.0,,,,,25.0,
2021-02-23,⚪️⚪️⚪️⚪️⚪️⚪️⚪️⚪️,💣 Burnout Day,True,6.6,0.5,,6.1,4.0,,1.0,,0.0,46.0,
2021-02-24,⚪️⚪️🟢 ⚪️🟣⚪️⚪️⚪️,❌ Not Productive,True,6.5,1.0,1.0,6.5,2.0,,,,,,


In [150]:
prep.rev_22.head()

Unnamed: 0_level_0,day_quality,productivity,exercise,wake_at,sleep_at,nap_hours,sleep_hours_total,eating_quality,tasks_completed,tasks_planned,weight,effectiveness,expense,sleep_quality
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-10-01,⚪️⚪️🟤⚪️⚫️⚫️⚪️,Productive,True,7.5,3.0,1.0,5.5,,8.0,10.0,114.1,0.8,6457.6,Good
2021-10-02,⚪️⚪️🟤⚪️⚫️⚪️⚪️,Productive,True,6.75,1.5,1.0,6.25,,9.0,11.0,114.5,0.818182,70.8,Good
2021-10-03,🔵🔵⚪️⚪️⚫️⚫️⚪️,Productive,True,9.0,3.0,0.0,6.0,,9.0,11.0,114.2,0.818182,30.2,
2021-10-04,⚪️⚪️⚪️🟣⚫️⚪️⚪️,Productive,True,5.0,1.5,2.0,5.5,,7.0,9.0,114.4,0.777778,131.8,Bad
2021-10-05,⚪️⚪️⚪️⚪️⚫️⚪️🔴,Productive,False,7.5,2.0,0.0,5.5,,4.0,7.0,114.5,0.571429,567.0,
