In [None]:
import numpy as np
import pandas as pd
import dcarte
import os
import math
import seaborn as sns
import warnings
from numpy import log2
warnings.filterwarnings('ignore')
dcarte.domains()

In [None]:
time = 1  # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
timestep = 3  # per day: 0, per week: 1
activity_raw = dcarte.load('Activity','RAW')
activity_legacy = dcarte.load('Motion','LEGACY')
sleep_raw = dcarte.load('Sleep_Dailies','PROFILE')
sleep_raw.to_csv('middle.csv')

In [None]:
def pre_process_legacy(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'Hallway':'hallway',
    'Kitchen':'kitchen',
    'Study':'study',
    'Bathroom':'bathroom',
    'Lounge':'lounge',
    'Bedroom':'bedroom',
    'Living Room':'living',
    'Front Door':'door',
    'D':'d',
    'Dining Room':'dining',
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['study','living','door','d','dining'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['index','timezone'],axis=1, inplace=True)
    activity = activity[['start_date','patient_id','location_name']]
    
    return activity

In [None]:
def pre_process_raw(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'conservatory':'conservatory',
    'WC1':'wc',
    'corridor1':'corridor',
    'living room':'living',
    'study':'study',
    'dining room':'dining',
    'bathroom1':'bathroom',
    'bedroom1':'bedroom',
    'hallway':'hallway',
    'lounge':'lounge',
    'kitchen':'kitchen',
    'cellar':'cellar',
    'office':'office'
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['cellar','office','dining','study','living','corridor','wc','conservatory'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['home_id','location_id','source'],axis=1, inplace=True)
    
    return activity

In [None]:
def pre_processing_in_bed():

    sleep_time_in = pd.read_csv('middle.csv', usecols=['patient_id','start_date','Time in bed'])
    sleep_time_in.columns = ['patient_id','start_date','time_in_bed']

    sleep_time_in.time_in_bed = sleep_time_in.time_in_bed.values.astype('float')
    sleep_time_in.time_in_bed = sleep_time_in.time_in_bed * 60 #  minute
    # sleep_time_in.time_in_bed = sleep_time_in.time_in_bed.values.astype('int')
    # sleep_time_in.time_in_bed = sleep_time_in.time_in_bed + 1 # Let the minimum value be 1, avoiding blank when transfer to 1 dimension.
    sleep_time_in.start_date = sleep_time_in.start_date.values.astype('datetime64[D]')
    sleep_time_in['week'] = sleep_time_in['start_date'].dt.to_period('W').apply(lambda r: r.start_time)

    sleep_time_in['day_rank'] = sleep_time_in.groupby(['week'])['start_date'].rank(method='dense')  # encode days of each week
    sleep_time_in.day_rank = sleep_time_in.day_rank.values.astype('int')
    sleep_time_in = sleep_time_in.groupby(['patient_id','week']).filter(lambda x:len(x)==7)

    return sleep_time_in

In [None]:
def pre_processing_out_bed():

    sleep_time_out = pd.read_csv('middle.csv', usecols=['patient_id','start_date','Time out of bed'])
    sleep_time_out.columns = ['patient_id','start_date','time_out_bed']

    sleep_time_out.sleep_time_out = sleep_time_out.time_out_bed.values.astype('float')
    sleep_time_out.sleep_time_out = sleep_time_out.time_out_bed * 60 #  minute
    # sleep_time_out.time_out_bed = sleep_time_out.time_out_bed.values.astype('int')
    # sleep_time_out.time_out_bed = sleep_time_out.time_out_bed + 1 # Let the minimum value be 1, avoiding blank when transfer to 1 dimension.
    sleep_time_out.start_date = sleep_time_out.start_date.values.astype('datetime64[D]')
    sleep_time_out['week'] = sleep_time_out['start_date'].dt.to_period('W').apply(lambda r: r.start_time)

    sleep_time_out['day_rank'] = sleep_time_out.groupby(['week'])['start_date'].rank(method='dense')  # encode days of each week
    sleep_time_out.day_rank = sleep_time_out.day_rank.values.astype('int')
    sleep_time_out = sleep_time_out.groupby(['patient_id','week']).filter(lambda x:len(x)==7)

    return sleep_time_out

In [None]:
def select_daytime_night(my_activity,my_time):
    
    # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
    signal = my_time
    
    if signal==0:
        print("Time: daytime")
        activity_day = my_activity
        activity_day['hour'] = activity_day.start_date.dt.hour
        # choose daytime, between [6:00-18:00]
        activity_day = activity_day[activity_day['hour'].between(6,17)]
        activity_day = activity_day.copy()
        activity_day.drop(['hour'],axis=1, inplace=True)
        activity_day['day_date'] =  activity_day.start_date.values.astype("datetime64[D]")
        activity_select = activity_day
        
    elif signal==1:
        print("Time: night")
        activity_night = my_activity
        activity_night['hour'] = activity_night.start_date.dt.hour
        # choose night time, except [6:00-18:00]. e.g., the night time on 22/3 includes 18:00-24:00 on 22/3 and 00:00-06:00 on 23/3
        activity_night = activity_night[~activity_night['hour'].between(6,17)]
        activity_night = activity_night.copy()
        activity_night['day_date'] = activity_night.start_date.values.astype("datetime64[D]")
        activity_night['last_date'] = activity_night['start_date'] + pd.Timedelta(days=-1)
        activity_night['day_date'] =  activity_night['day_date'].mask(activity_night['hour']<6, activity_night['last_date'])
        activity_night['day_date'] = activity_night.day_date.values.astype("datetime64[D]")
        activity_night.drop(['hour','last_date'],axis=1, inplace=True)
        activity_select = activity_night
        
    else:
        raise ValueError("Error: please input correct number! daytime:0 (6:00-18:00), night:1 (18:00-6:00)")
        
    return activity_select

In [None]:
def select_time_step(my_activity, my_timestep):
    
    activity = pd.DataFrame(my_activity)
    # per day: 0, per week: 1, per hour: 2, accurate time: 3
    signal = my_timestep
    
    if signal==0:
        print("Timestep: per day")
        activity.day_date = activity.day_date.values.astype("datetime64[D]")
        activity = activity.groupby(['patient_id','day_date']).filter(lambda x:len(x)>2)
        
    elif signal==1:
        print("Timestep: per week")
        activity['week'] = activity['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
        activity.drop(['day_date','start_date'],axis=1, inplace=True)
        activity.columns=['patient_id','location_name','start_date']
        activity = activity.groupby(['patient_id','start_date']).filter(lambda x:len(x)>2)
        
    elif signal==2:
        print("Timestep: per hour")
        activity.start_date = activity.start_date.values.astype("datetime64[h]")
    
    elif signal==3:
        print("Accurate time")
        activity.start_date = activity.start_date.values.astype("datetime64[ns]")
        
    else:
        raise ValueError("Error: please input correct number! per day: 0, per week: 1")
    
    return activity

In [None]:
def get_von_nuemann_entropy(my_data):

    # get density matrix
    activity_patient = pd.DataFrame(my_data,columns=my_data.columns[2:])
    activity_patient = activity_patient.fillna(0)
    activity_patient = np.array(activity_patient)
    activity_patient_pearson_matrix = (np.matrix(np.corrcoef(activity_patient, rowvar=True)))
    activity_density_matrix = np.matrix(activity_patient_pearson_matrix/len(activity_patient_pearson_matrix))
    
    # get von nuemann entropy 
    B = activity_density_matrix
    I = np.eye(len(B))
    M = (B-I) - ((B-I)**2)/2 + ((B-I)**3)/3 - ((B-I)**4)/4 + ((B-I)**5)/5 - ((B-I)**6)/6
    R = B*M
    S = -np.matrix.trace(R)

    return (S.reshape(-1))[0,0]

In [None]:
activity_legacy = pre_process_legacy(activity_legacy)
activity_raw = pre_process_raw(activity_raw)
activity = pd.concat([activity_raw, activity_legacy], axis=0)
activity = activity.sort_values(['patient_id','start_date'])
activity = activity.reset_index(drop=True)
activity = select_daytime_night(activity,time)
activity = select_time_step(activity,timestep)
activity['day_date'] = activity.day_date.values.astype('datetime64[D]')
activity.head()

In [None]:
activity_t = activity.iloc[:]
activity_t.head()

In [None]:
sleep_time_in = pre_processing_in_bed()
sleep_time_out = pre_processing_out_bed()
sleep_time_in.columns = ['patient_id','day_date','time_in_bed','week','day_rank']
sleep_time_out.columns = ['patient_id','day_date','time_out_bed','week','day_rank']
sleep_time_in.time_in_bed = sleep_time_in.time_in_bed * 60 * 60
sleep_time_out.time_out_bed = sleep_time_out.time_out_bed * 60 * 60


sleep_all = pd.merge(sleep_time_in,sleep_time_out, on=['day_date','patient_id'])
sleep_all_col = ['patient_id','day_date','time_in_bed','time_out_bed']
sleep_all = pd.DataFrame(sleep_all, columns=sleep_all_col)
sleep_all.columns=['patient_id','day_date', 'time_in_bed','time_out_bed']
sleep_all

In [None]:
activity_t = activity.iloc[:]
activity_t

In [None]:
activity_t['week'] = activity_t['day_date'].dt.to_period('W').apply(lambda r: r.start_time)

activity_t = activity_t.sort_values(['patient_id','start_date'])
activity_t = activity_t.reset_index(drop=True)
activity_t['week_rank'] = activity_t.groupby('patient_id')['week'].rank(method='dense')

# Calculate stay time
activity_t['location_name_next'] = activity_t.location_name.shift(-1)
activity_t['path'] = activity_t.location_name +'_'+activity_t.location_name_next
activity_t['day_date']= activity_t.start_date.values.astype('datetime64[D]')
activity_t = activity_t.reset_index(drop=True)
activity_t = activity_t.reset_index()
activity_t

In [None]:
activity_stay_time = activity_t.groupby([activity_t['patient_id'],activity_t['day_date']]).apply(lambda x: x.start_date.diff(periods=1).dt.seconds).reset_index()
activity_stay_time.head()

In [None]:
activity_stay_time.columns=['patient_id','day_date','index','stay_time']
activity_stay_time.stay_time = activity_stay_time.stay_time.values.astype('float')
activity_stay_time.head()

In [None]:
activity_n = pd.merge(activity_stay_time,activity_t,on='index')
activity_n = pd.DataFrame(activity_n, columns=['patient_id_x','day_date_y','stay_time', 'path'])
activity_n.columns = ['patient_id','day_date','stay_time','path']
activity_n

In [None]:
# Calculate the total stay time on each day
activity_day = activity_n.groupby([activity_n['patient_id'],activity_n['day_date'], activity_n['path']]).sum().reset_index()

# Optimise the format
activity_day = activity_day.set_index([ 'patient_id','day_date', 'path'])['stay_time']
activity_day = activity_day.unstack()
activity_day = activity_day.sort_values(['patient_id', 'day_date']).reset_index()
activity_day.day_date = activity_day.day_date.values.astype('datetime64[D]')
activity_day['week'] = activity_day['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
activity_day = activity_day.groupby([activity_day['patient_id'],activity_day['week']]).filter(lambda x:len(x)>2)

activity_day['week_rank'] = activity_day.groupby('patient_id')['week'].rank(method='dense')
activity_day = activity_day.fillna(0)
activity_day.head()

In [None]:
activity_all = pd.merge(activity_day,sleep_all, on=['day_date','patient_id'])
activity_all['day_date'] = activity_all['week']
activity_all.drop(['week','week_rank'],axis=1, inplace=True)
activity_all.rename(columns = {'day_date':'week'},inplace=True)
activity_all.head()

In [None]:
activity_all = activity_all.groupby(['patient_id','week']).filter(lambda x:len(x)>3)
activity_all.head()

In [None]:
activity_all_s = activity_all.iloc[:]
activity_vn = activity_all_s.groupby(['patient_id','week']).apply(get_von_nuemann_entropy)
activity_vn

In [None]:
activity_vn.to_csv('middle.csv')
activity_vn = pd.read_csv('middle.csv')
activity_vn.columns = ['patient_id','week','activity_vn']
activity_vn.head()

In [None]:
activity_vn = activity_vn.groupby(['patient_id']).filter(lambda x:len(x)>8)

In [None]:
activity_vn.to_csv('c_activity_night_per_week_entropy_vn_mk_duration.csv')