In [None]:
import numpy as np
import pandas as pd
import dcarte
import os
import math as m
import seaborn as sns
import warnings
# import qutip as qt
from numpy import log2
from scipy import linalg as la
warnings.filterwarnings('ignore')
# from dcarte_transform.label.uti import label, get_labels
import datetime
dcarte.domains()

In [None]:
time = 0  # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
timestep = 0  # per day: 0, per week: 1
activity_raw = dcarte.load('Activity','RAW')
activity_legacy = dcarte.load('Motion','LEGACY')

In [None]:
def pre_process_raw(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'conservatory':'conservatory',
    'WC1':'wc',
    'corridor1':'corridor',
    'living room':'living',
    'study':'study',
    'dining room':'dining',
    'bathroom1':'bathroom',
    'bedroom1':'bedroom',
    'hallway':'hallway',
    'lounge':'lounge',
    'kitchen':'kitchen',
    'cellar':'cellar',
    'office':'office'
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['cellar','office','dining','study','living','corridor','wc','conservatory'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['home_id','location_id','source'],axis=1, inplace=True)
    
    return activity

In [None]:
def pre_process_legacy(activity_raw):
    
    # delete
    activity = activity_raw
    
    # revise location names
    activity = activity.copy()
    mapping = {
    'Hallway':'hallway',
    'Kitchen':'kitchen',
    'Study':'study',
    'Bathroom':'bathroom',
    'Lounge':'lounge',
    'Bedroom':'bedroom',
    'Living Room':'living',
    'Front Door':'door',
    'D':'d',
    'Dining Room':'dining',
    }
    activity.location_name = activity.location_name.map(mapping)
    activity = activity[~activity['location_name'].isin(['study','living','door','d','dining'])]
    activity.location_name = activity.location_name.values.astype('str')
    activity.patient_id = activity.patient_id.values.astype('str')
    
    # delete rebundant columns
    activity.drop(['index','timezone'],axis=1, inplace=True)
    activity = activity[['start_date','patient_id','location_name']]
    
    return activity

In [None]:
def select_daytime_night(my_activity,my_time):
    
    # daytime:0 (6:00-18:00), night:1 (18:00-6:00)
    signal = my_time
    
    if signal==0:
        print("Time: daytime")
        activity_day = my_activity
        activity_day['hour'] = activity_day.start_date.dt.hour
        # choose daytime, between [6:00-18:00]
        activity_day = activity_day[activity_day['hour'].between(6,17)]
        activity_day = activity_day.copy()
        activity_day.drop(['hour'],axis=1, inplace=True)
        activity_day['day_date'] =  activity_day.start_date.values.astype("datetime64[D]")
        activity_select = activity_day
        
    elif signal==1:
        print("Time: night")
        activity_night = my_activity
        activity_night['hour'] = activity_night.start_date.dt.hour
        # choose night time, except [6:00-18:00]. e.g., the night time on 22/3 includes 18:00-24:00 on 22/3 and 00:00-06:00 on 23/3
        activity_night = activity_night[~activity_night['hour'].between(6,17)]
        activity_night = activity_night.copy()
        activity_night['day_date'] = activity_night.start_date.values.astype("datetime64[D]")
        activity_night['last_date'] = activity_night['start_date'] + pd.Timedelta(days=-1)
        activity_night['day_date'] =  activity_night['day_date'].mask(activity_night['hour']<6, activity_night['last_date'])
        activity_night['day_date'] = activity_night.day_date.values.astype("datetime64[D]")
        activity_night.drop(['hour','last_date'],axis=1, inplace=True)
        activity_select = activity_night
        
    else:
        raise ValueError("Error: please input correct number! daytime:0 (6:00-18:00), night:1 (18:00-6:00)")
        
    return activity_select

In [None]:
def select_time_step(my_activity, my_timestep):
    
    activity = pd.DataFrame(my_activity)
    # per day: 0, per week: 1, per hour: 2, accurate time: 3
    signal = my_timestep
    
    if signal==0:
        print("Timestep: per day")
        activity.day_date = activity.day_date.values.astype("datetime64[D]")
        activity = activity.groupby(['patient_id','day_date']).filter(lambda x:len(x)>2)
        
    elif signal==1:
        print("Timestep: per week")
        activity['week'] = activity['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
        activity.drop(['day_date','start_date'],axis=1, inplace=True)
        activity.columns=['patient_id','location_name','start_date']
        activity = activity.groupby(['patient_id','start_date']).filter(lambda x:len(x)>2)
        
    elif signal==2:
        print("Timestep: per hour")
        activity.start_date = activity.start_date.values.astype("datetime64[h]")
    
    elif signal==3:
        print("Accurate time")
        activity.start_date = activity.start_date.values.astype("datetime64[ns]")
        
    else:
        raise ValueError("Error: please input correct number! per day: 0, per week: 1")
    
    return activity

In [None]:
def get_von_nuemann_entropy(my_data):

    # get density matrix
    activity_patient = pd.DataFrame(my_data, columns=['bathroom','bedroom','hallway','kitchen','lounge'])
    activity_patient = activity_patient.fillna(0)
    activity_patient = np.array(activity_patient)
    sactivity_patient_pearson_matrix = (np.matrix(np.corrcoef(activity_patient, rowvar=False)))
    activity_density_matrix = np.matrix(sactivity_patient_pearson_matrix/len(sactivity_patient_pearson_matrix))
    
    # get von nuemann entropy 
    B = activity_density_matrix
    I = np.eye(len(B))
    M = (B-I) - ((B-I)**2)/2 + ((B-I)**3)/3 - ((B-I)**4)/4 + ((B-I)**5)/5 - ((B-I)**6)/6
    R = B*M
    S = -np.matrix.trace(R)

    return (S.reshape(-1))[0,0]

In [None]:
activity_legacy = pre_process_legacy(activity_legacy)
activity_raw = pre_process_raw(activity_raw)
activity = pd.concat([activity_raw, activity_legacy], axis=0)
activity = activity.sort_values(['patient_id','start_date'])
activity = activity.reset_index(drop=True)
activity = select_daytime_night(activity,time)
activity = select_time_step(activity,timestep)
activity.day_date = activity.day_date.values.astype('datetime64[D]')
activity.head()

In [None]:
activity_count = activity.groupby(['patient_id','day_date'])['location_name'].value_counts()
activity_count = activity_count.unstack().reset_index()
activity_count.day_date = activity_count.day_date.values.astype("datetime64[D]")
activity_count['week'] = activity_count['day_date'].dt.to_period('W').apply(lambda r: r.start_time)
activity_count = activity_count.groupby(['patient_id','week']).filter(lambda x:len(x)>3)
activity_count = activity_count.fillna(0)
activity_count.head()

In [None]:
activity_count_s = activity_count.iloc[:]
activity_vn = activity_count_s.groupby(['patient_id','week']).apply(get_von_nuemann_entropy).reset_index()
activity_vn.head()

In [None]:
activity_vn.columns = ['patient_id','week','activity_vn']
activity_vn = activity_vn.groupby(['patient_id']).filter(lambda x:len(x)>8)
activity_vn.head()

In [None]:
activity_vn.to_csv('c_activity_daytime_per_week_entropy_vn_frequency.csv')