In [1]:
import numpy as np
import pandas as pd
import dcarte
import os
import math as m
import seaborn as sns
import warnings
# import qutip as qt
from numpy import log2
from scipy import linalg as la
warnings.filterwarnings('ignore')
# from dcarte_transform.label.uti import label, get_labels
import datetime
dcarte.domains()

Unnamed: 0,RAW,LEGACY,BASE,PROFILE,LOOKUP,BED_HABITS
0,Activity,Device_Type,Bed_Occupancy,Activity_Dailies,Device_Types,Bed_Occupancy
1,Appliances,Doors,Doors,Activity_Weeklies,Homes,
2,Behavioural,Entryway,Entryway,Light,Patients,
3,Blood_Pressure,Flags,Habitat,Physiology_Dailies,,
4,Door,Light,Kitchen,Physiology_Weeklies,,
5,Encounter,Motion,Motion,Sleep_Dailies,,
6,Environmental,Observation,Physiology,Sleep_Weeklies,,
7,Issue,Physiology,Sleep,Temperature,,
8,Observation_Notes,Temperature,Transitions,,,
9,Procedure,Wellbeing,,,,


In [2]:
time = 1  # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
timestep = 0  # per day: 0, per week: 1
activity_raw = dcarte.load('Activity','RAW')
activity_legacy = dcarte.load('Motion','LEGACY')

Finished Loading Activity in:                  1.1 seconds   
Finished Loading Motion in:                    1.4 seconds   


In [3]:
def pre_process_raw(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'conservatory':'conservatory',
    'WC1':'wc',
    'corridor1':'corridor',
    'living room':'living',
    'study':'study',
    'dining room':'dining',
    'bathroom1':'bathroom',
    'bedroom1':'bedroom',
    'hallway':'hallway',
    'lounge':'lounge',
    'kitchen':'kitchen',
    'cellar':'cellar',
    'office':'office'
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['cellar','office','dining','study','living','corridor','wc','conservatory'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['home_id','location_id','source'],axis=1, inplace=True)
    
    return activity

In [4]:
def pre_process_legacy(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'Hallway':'hallway',
    'Kitchen':'kitchen',
    'Study':'study',
    'Bathroom':'bathroom',
    'Lounge':'lounge',
    'Bedroom':'bedroom',
    'Living Room':'living',
    'Front Door':'door',
    'D':'d',
    'Dining Room':'dining',
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['study','living','door','d','dining'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['index','timezone'],axis=1, inplace=True)
    activity = activity[['start_date','patient_id','location_name']]
    
    return activity

In [5]:
def select_daytime_night(my_activity,my_time):
    
    # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
    signal = my_time
    
    if signal==0:
        print("Time: daytime")
        activity_day = my_activity
        activity_day['hour'] = activity_day.start_date.dt.hour
        # choose daytime, between [6:00-18:00]
        activity_day = activity_day[activity_day['hour'].between(6,17)]
        activity_day = activity_day.copy()
        activity_day.drop(['hour'],axis=1, inplace=True)
        activity_day['day_date'] =  activity_day.start_date.values.astype("datetime64[D]")
        activity_select = activity_day
        
    elif signal==1:
        print("Time: night")
        activity_night = my_activity
        activity_night['hour'] = activity_night.start_date.dt.hour
        # choose night time, except [6:00-18:00]. e.g., the night time on 22/3 includes 18:00-24:00 on 22/3 and 00:00-06:00 on 23/3
        activity_night = activity_night[~activity_night['hour'].between(6,17)]
        activity_night = activity_night.copy()
        activity_night['day_date'] = activity_night.start_date.values.astype("datetime64[D]")
        activity_night['last_date'] = activity_night['start_date'] + pd.Timedelta(days=-1)
        activity_night['day_date'] =  activity_night['day_date'].mask(activity_night['hour']<6, activity_night['last_date'])
        activity_night['day_date'] = activity_night.day_date.values.astype("datetime64[D]")
        activity_night.drop(['hour','last_date'],axis=1, inplace=True)
        activity_select = activity_night
        
    else:
        raise ValueError("Error: please input correct number! daytime:0 (6:00-18:00), night:1 (18:00-6:00)")
        
    return activity_select

In [6]:
def select_time_step(my_activity, my_timestep):
    
    activity = pd.DataFrame(my_activity)
    # per day: 0, per week: 1, per hour: 2, accurate time: 3
    signal = my_timestep
    
    if signal==0:
        print("Timestep: per day")
        activity.day_date = activity.day_date.values.astype("datetime64[D]")
        activity = activity.groupby(['patient_id','day_date']).filter(lambda x:len(x)>2)
        
    elif signal==1:
        print("Timestep: per week")
        activity['week'] = activity['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
        activity.drop(['day_date','start_date'],axis=1, inplace=True)
        activity.columns=['patient_id','location_name','start_date']
        activity = activity.groupby(['patient_id','start_date']).filter(lambda x:len(x)>2)
        
    elif signal==2:
        print("Timestep: per hour")
        activity.start_date = activity.start_date.values.astype("datetime64[h]")
    
    elif signal==3:
        print("Accurate time")
        activity.start_date = activity.start_date.values.astype("datetime64[ns]")
        
    else:
        raise ValueError("Error: please input correct number! per day: 0, per week: 1")
    
    return activity

In [7]:
def get_von_nuemann_entropy(my_data):

    # get density matrix
    activity_patient = pd.DataFrame(my_data, columns=['bathroom','bedroom','hallway','kitchen','lounge'])
    # activity_patient = (activity_patient - activity_patient.min()) / (activity_patient.max()-activity_patient.min())
    activity_patient = np.array(activity_patient)
    sactivity_patient_pearson_matrix = (np.matrix(np.corrcoef(activity_patient, rowvar=False)))
    activity_density_matrix = np.matrix(sactivity_patient_pearson_matrix/len(sactivity_patient_pearson_matrix))
    
    # get von nuemann entropy 

    B = activity_density_matrix
    I = np.eye(len(B))
    M = (B-I) - ((B-I)**2)/2 + ((B-I)**3)/3 - ((B-I)**4)/4 + ((B-I)**5)/5 - ((B-I)**6)/6
    R = B*M
    S = -np.matrix.trace(R)
    # print((S.reshape(-1))[0,0])

    return (S.reshape(-1))[0,0]

In [8]:
activity_legacy = pre_process_legacy(activity_legacy)
activity_raw = pre_process_raw(activity_raw)
activity = pd.concat([activity_raw, activity_legacy], axis=0)
activity = activity.sort_values(['patient_id','start_date'])
activity = activity.reset_index(drop=True)
activity = select_daytime_night(activity,time)
activity = select_time_step(activity,timestep)
activity.day_date = activity.day_date.values.astype('datetime64[D]')
activity

Time: night
Timestep: per day


Unnamed: 0,start_date,patient_id,location_name,day_date
128,2019-04-18 18:34:59,154dFNbk7ymdc6466EGbwT,lounge,2019-04-18
129,2019-04-18 18:35:07,154dFNbk7ymdc6466EGbwT,kitchen,2019-04-18
130,2019-04-18 18:36:00,154dFNbk7ymdc6466EGbwT,lounge,2019-04-18
131,2019-04-18 18:55:48,154dFNbk7ymdc6466EGbwT,lounge,2019-04-18
132,2019-04-18 19:08:12,154dFNbk7ymdc6466EGbwT,lounge,2019-04-18
...,...,...,...,...
23176258,2019-08-14 04:31:05,yK2pxi2t19LksYCxU1sPe,kitchen,2019-08-13
23176259,2019-08-14 04:32:50,yK2pxi2t19LksYCxU1sPe,hallway,2019-08-13
23176260,2019-08-14 04:37:56,yK2pxi2t19LksYCxU1sPe,bathroom,2019-08-13
23176261,2019-08-14 04:39:46,yK2pxi2t19LksYCxU1sPe,bathroom,2019-08-13


In [9]:
activity_count = activity.groupby(['patient_id','day_date'])['location_name'].value_counts()
activity_count = activity_count.unstack().reset_index()
activity_count.day_date = activity_count.day_date.values.astype("datetime64[D]")
activity_count['week'] = activity_count['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
activity_count = activity_count.groupby(['patient_id','week']).filter(lambda x:len(x)>3)
activity_count = activity_count.fillna(0)
activity_count

location_name,patient_id,day_date,bathroom,bedroom,hallway,kitchen,lounge,week
0,154dFNbk7ymdc6466EGbwT,2019-04-18,18.0,19.0,7.0,5.0,12.0,2019-04-15
1,154dFNbk7ymdc6466EGbwT,2019-04-19,19.0,19.0,7.0,11.0,16.0,2019-04-15
2,154dFNbk7ymdc6466EGbwT,2019-04-20,18.0,22.0,8.0,9.0,8.0,2019-04-15
3,154dFNbk7ymdc6466EGbwT,2019-04-21,19.0,18.0,7.0,5.0,12.0,2019-04-15
4,154dFNbk7ymdc6466EGbwT,2019-04-22,20.0,18.0,5.0,4.0,9.0,2019-04-22
...,...,...,...,...,...,...,...,...
56615,yK2pxi2t19LksYCxU1sPe,2019-08-07,4.0,38.0,29.0,57.0,26.0,2019-08-05
56616,yK2pxi2t19LksYCxU1sPe,2019-08-08,1.0,25.0,47.0,66.0,30.0,2019-08-05
56617,yK2pxi2t19LksYCxU1sPe,2019-08-09,6.0,28.0,47.0,102.0,30.0,2019-08-05
56618,yK2pxi2t19LksYCxU1sPe,2019-08-10,10.0,17.0,47.0,58.0,41.0,2019-08-05


In [10]:
activity_count_s = activity_count.iloc[:]
activity_vn = activity_count_s.groupby(['patient_id','week']).apply(get_von_nuemann_entropy).reset_index()
activity_vn.columns = ['patient_id','week','activity_vn']
activity_vn = activity_vn.groupby(['patient_id']).filter(lambda x:len(x)>8)
activity_vn

Unnamed: 0,patient_id,week,activity_vn
0,154dFNbk7ymdc6466EGbwT,2019-04-15,0.775215
1,154dFNbk7ymdc6466EGbwT,2019-04-22,1.156608
2,154dFNbk7ymdc6466EGbwT,2019-04-29,1.071865
3,154dFNbk7ymdc6466EGbwT,2019-05-06,1.061413
4,154dFNbk7ymdc6466EGbwT,2019-05-13,0.595758
...,...,...,...
8237,yK2pxi2t19LksYCxU1sPe,2019-07-08,0.949237
8238,yK2pxi2t19LksYCxU1sPe,2019-07-15,0.912793
8239,yK2pxi2t19LksYCxU1sPe,2019-07-22,1.003562
8240,yK2pxi2t19LksYCxU1sPe,2019-07-29,1.001145


In [11]:
activity_vn.to_csv('c_activity_night_per_week_entropy_vn_frequency.csv')
FIGURE_DIR = os.path.join("raw_data","activity_night_per_week_entropy_vn_frequency")
activity_vn.groupby('patient_id').apply(lambda x: x.to_csv(os.path.join(FIGURE_DIR, f'{x.patient_id.iloc[0]}.csv')))