In [None]:
import numpy as np
import pandas as pd
import dcarte
import os
import math
import seaborn as sns
import warnings
from numpy import log2
warnings.filterwarnings('ignore')
dcarte.domains()

In [None]:
time = 0  # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
timestep = 0  # per day: 0, per week: 1
activity_raw = dcarte.load('Activity','RAW')
activity_legacy = dcarte.load('Motion','LEGACY')

In [None]:
def pre_process_raw(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'conservatory':'conservatory',
    'WC1':'wc',
    'corridor1':'corridor',
    'living room':'living',
    'study':'study',
    'dining room':'dining',
    'bathroom1':'bathroom',
    'bedroom1':'bedroom',
    'hallway':'hallway',
    'lounge':'lounge',
    'kitchen':'kitchen',
    'cellar':'cellar',
    'office':'office'
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['cellar','office','dining','study','living','corridor','wc','conservatory'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['home_id','location_id','source'],axis=1, inplace=True)
    
    return activity

In [None]:
def pre_process_legacy(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'Hallway':'hallway',
    'Kitchen':'kitchen',
    'Study':'study',
    'Bathroom':'bathroom',
    'Lounge':'lounge',
    'Bedroom':'bedroom',
    'Living Room':'living',
    'Front Door':'door',
    'D':'d',
    'Dining Room':'dining',
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['study','living','door','d','dining'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['index','timezone'],axis=1, inplace=True)
    activity = activity[['start_date','patient_id','location_name']]
    
    return activity

In [None]:
def select_daytime_night(my_activity,my_time):
    
    # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
    signal = my_time
    
    if signal==0:
        print("Time: daytime")
        activity_day = my_activity
        activity_day['hour'] = activity_day.start_date.dt.hour
        # choose daytime, between [6:00-18:00]
        activity_day = activity_day[activity_day['hour'].between(6,17)]
        activity_day = activity_day.copy()
        activity_day.drop(['hour'],axis=1, inplace=True)
        activity_day['day_date'] =  activity_day.start_date.values.astype("datetime64[D]")
        activity_select = activity_day
        
    elif signal==1:
        print("Time: night")
        activity_night = my_activity
        activity_night['hour'] = activity_night.start_date.dt.hour
        # choose night time, except [6:00-18:00]. e.g., the night time on 22/3 includes 18:00-24:00 on 22/3 and 00:00-06:00 on 23/3
        activity_night = activity_night[~activity_night['hour'].between(6,17)]
        activity_night = activity_night.copy()
        activity_night['day_date'] = activity_night.start_date.values.astype("datetime64[D]")
        activity_night['last_date'] = activity_night['start_date'] + pd.Timedelta(days=-1)
        activity_night['day_date'] =  activity_night['day_date'].mask(activity_night['hour']<6, activity_night['last_date'])
        activity_night['day_date'] = activity_night.day_date.values.astype("datetime64[D]")
        activity_night.drop(['hour','last_date'],axis=1, inplace=True)
        activity_select = activity_night
        
    else:
        raise ValueError("Error: please input correct number! daytime:0 (6:00-18:00), night:1 (18:00-6:00)")
        
    return activity_select

In [None]:
def select_time_step(my_activity, my_timestep):
    
    activity = pd.DataFrame(my_activity)
    # per day: 0, per week: 1, per hour: 2, accurate time: 3
    signal = my_timestep
    
    if signal==0:
        print("Timestep: per day")
        activity.day_date = activity.day_date.values.astype("datetime64[D]")
        activity = activity.groupby(['patient_id','day_date']).filter(lambda x:len(x)>2)
        
    elif signal==1:
        print("Timestep: per week")
        activity['week'] = activity['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
        activity.drop(['day_date','start_date'],axis=1, inplace=True)
        activity.columns=['patient_id','location_name','start_date']
        activity = activity.groupby(['patient_id','start_date']).filter(lambda x:len(x)>2)
        
    elif signal==2:
        print("Timestep: per hour")
        activity.start_date = activity.start_date.values.astype("datetime64[h]")
    
    elif signal==3:
        print("Accurate time")
        activity.start_date = activity.start_date.values.astype("datetime64[ns]")
        
    else:
        raise ValueError("Error: please input correct number! per day: 0, per week: 1")
    
    return activity

In [None]:
def get_transition_matrix(my_activity):
    activity = my_activity.copy()
    activity['location_name_next'] = activity.location_name.shift(-1)
    activity['path'] = activity.location_name +'_'+activity.location_name_next
    
    # initialize
    column = ['bathroom','bedroom','kitchen','lounge','hallway']
    index = ['bathroom','bedroom','kitchen','lounge','hallway']
    nums = np.zeros((len(index),len(column)))
    transition_matrix = pd.DataFrame(data=nums, index=index, columns=column)
    
    # calculate total number of times
    total_num = pd.DataFrame(activity.location_name.value_counts())
    total_num = total_num.reset_index()
    total_num.columns = ['beginning','total_num']
    total_num['num_plus'] = total_num.total_num-1
    total_num['total_num'] =  total_num['total_num'].mask(total_num['beginning']==activity.iloc[-1].location_name, total_num['num_plus'])
    total_num.drop(['num_plus'],axis=1, inplace=True)
    
    # built path name
    for m in range(len(transition_matrix.columns)):
        for n in range(len(transition_matrix.columns)):
            my_location = f"{transition_matrix.columns[m]}_{transition_matrix.columns[n]}"
            exec(my_location +'=%s' % (transition_matrix.iloc[m,n]))
    
    # calculate number of times and the rate
    path_num = pd.DataFrame(activity.path.value_counts())
    path_num = path_num.reset_index()
    path_num.columns = ['path','path_num']
    path_num.path = path_num.path.values.astype('str')
    path_num['beginning'] = path_num['path'].str.extract(r'([A-Za-z]+)[_]',expand=True)
    path_num['ending'] = path_num['path'].str.extract(r'[_]([A-Za-z]+)',expand=True)
    path_rate = pd.merge(path_num,total_num,on='beginning')
    path_rate['path_rate'] = path_rate.path_num / path_rate.total_num
    
    # fill transfer matrix
    for k in range(len(path_rate)):
        transition_matrix.loc[path_rate.beginning.iloc[k],path_rate.ending.iloc[k]] = path_rate.path_rate.iloc[k]
    
    transition_matrix = transition_matrix.fillna(0)
    return transition_matrix

In [None]:
def get_stationary_distribution(my_transition_matrix):
    P = np.array(my_transition_matrix)
    A=np.append(np.transpose(P)-np.identity(len(P)),[np.ones(len(P))],axis=0)
    b=np.transpose(np.append(np.array([np.zeros(len(P))]),1))
    stationary_distribution = np.linalg.solve(np.transpose(A).dot(A), np.transpose(A).dot(b))
    stationary_distribution = pd.DataFrame(stationary_distribution)
    return stationary_distribution

In [None]:
def get_entropy_week(my_activity_week, my_stationary_distribution):
    activity_week = my_activity_week
    my_transition_matrix_week = get_transition_matrix(activity_week)

    entropy_week = 0
    for k in range(len(my_stationary_distribution)):
        entropy_week = entropy_week + (-(my_stationary_distribution.iloc[k]*((my_transition_matrix_week.iloc[k]*log2(my_transition_matrix_week.iloc[k])).sum())))
    return entropy_week



In [None]:
def get_entropy(my_activity):
    my_activity_6_months = my_activity[my_activity['day_date']<(my_activity.day_date.iloc[0]+pd.Timedelta(days=4*4*7))]
    my_transition_matrix_6_months = get_transition_matrix(my_activity_6_months)
    my_stationary_distribution = get_stationary_distribution(my_transition_matrix_6_months)

    my_entropy = my_activity.groupby([my_activity['day_date']]).apply(lambda x: get_entropy_week(x, my_stationary_distribution))

    return my_entropy

In [None]:
activity_legacy = pre_process_legacy(activity_legacy)
activity_raw = pre_process_raw(activity_raw)
activity_all = pd.concat([activity_raw, activity_legacy], axis=0)
activity_all = activity_all.sort_values(['patient_id','start_date'])
activity_all = activity_all.reset_index(drop=True)
activity_all = select_daytime_night(activity_all,time)
activity_all = select_time_step(activity_all,timestep)
activity_all.head()

In [None]:
activity=activity_all.iloc[:]

In [None]:
activity_mk_entropy = activity.groupby(activity['patient_id']).apply(get_entropy).reset_index()
activity_mk_entropy.columns = ['patient_id','start_date','mk_entropy_day']
activity_mk_entropy.start_date = activity_mk_entropy.start_date.values.astype('datetime64[D]')
activity_mk_entropy['week'] = activity_mk_entropy['start_date'].dt.to_period('W').apply(lambda r: r.start_time)
activity_mk_entropy = activity_mk_entropy.drop_duplicates(subset=['patient_id','start_date'],keep='first')
activity_mk_entropy = activity_mk_entropy.groupby([activity_mk_entropy['patient_id'], activity_mk_entropy['week']]).filter(lambda x:len(x)>3)
activity_mk_entropy.head()

In [None]:
activity_mk_entropy_week = activity_mk_entropy.groupby([activity_mk_entropy['patient_id'],activity_mk_entropy['week']]).mean().reset_index()
activity_mk_entropy_week.columns = ['patient_id','start_date','entropy_week']
activity_mk_entropy_week = activity_mk_entropy_week.groupby(['patient_id']).filter(lambda x:len(x)>8)
activity_mk_entropy_week.head()

In [None]:
activity_mk_entropy_week.to_csv('c_activity_daytime_per_week_entropy_mk.csv')