Pre

In [1]:
from Funcs.Utility import *
import numpy as np
import pandas as pd
from typing import Dict, Callable, Union, Tuple, List, Optional, Iterable
from datetime import timedelta as td
from scipy import stats
import ray
import warnings
import time
import ray
import dask
import scipy.spatial.distance as dist
import math

New modified Functions based on Panyu's original D1 preprocessing

In [2]:
def _proc_app_usage(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    # Filter the data to include only relevant types of app usage events
    data = data.loc[
        lambda x: x['type'].isin(['ACTIVITY_RESUMED', 'ACTIVITY_PAUSED']), :
    ].assign(
        category=lambda x: np.where(x['type'] == 'ACTIVITY_RESUMED', x['category'], None),
    )
    
    data = data.rename(columns={'category': 'subcategory'})
    data['category'] = [transform.get(item, item) for item in data['subcategory'].values]
    
    move = data
    Duration = []
    for pcode in data.index.get_level_values('pcode').unique():
        sub_move = move.loc[(pcode, ), :].sort_index(axis=0, level='timestamp').assign(pcode=pcode)
        sub_move = sub_move.reset_index()
        sub_move['move_state'] = sub_move['type'].shift().fillna('ACTIVITY_PAUSED')
        sub_move.loc[0, 'move_state'] = 'ACTIVITY_PAUSED'
        sub_move = sub_move[sub_move['move_state'] != sub_move['type']]
        sub_move.index = pd.to_datetime(sub_move.index)
        sub_move['duration'] = sub_move['timestamp'] - sub_move['timestamp'].shift()
        sub_move.loc[0, 'duration'] = pd.Timedelta(0)
        sub_move = sub_move[sub_move['duration'] > pd.Timedelta(0)]
        sub_move['duration_sec'] = sub_move['duration'].dt.total_seconds()
        sub_move['category'] = sub_move['category']
        Duration.append(sub_move)
    
    Duration = pd.concat(Duration, axis=0, ignore_index=True).set_index(
        ['pcode', 'timestamp']
    )
    Duration = Duration[Duration['move_state'] == 'ACTIVITY_RESUMED']
    
    cnt = Duration['category'].value_counts()
    _val, _sup = cnt.index, cnt.values
    
    DUR = {'DUR_{}'.format(_k): Duration[Duration['category'] == '{}'.format(_k)]['duration_sec'].astype('float32') for _k in _val}
    
    CAT = {'CAT': data['category'].astype('object')}
    
    Feature = {**DUR, **CAT}
    
    return Feature

def _proc_connectivity(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    data = data.assign(
        type=lambda x: np.where(x['isConnected'] == True, x['type'], 'DISCONNECTED')
    )

    return data['type'].astype('object')

def _proc_battery(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return {
        'LEV': data['level'].astype('float32'),
        'STA': data['status'].astype('object'),
        'TMP': (data['temperature'] / 10).astype('float32'),  # Assuming temperature is now in deci-degrees
        'PLG': data['pluggedType'].astype('object')
    }


def _proc_call(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    # Filter out calls with duration = 0
    data = data.loc[
        lambda x: x['duration'] > 0, :
    ]

    return {
        'DUR': data['duration'].astype('float32'),
        'CNT': data['contactType'].astype('object')
    }


    return {
        'DUR': data['duration'].astype('float32'),
        'CNT': data['timesContacted'].astype('int')
    }

def _proc_data_traffic(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return {
        'RCV': data['rxBytes'].astype('float32'),
        'SNT': data['txBytes'].astype('float32'),
        'MRCV': data['mobileRxBytes'].astype('float32'),
        'MSNT': data['mobileTxBytes'].astype('float32')
    }

def _proc_ringer_mode(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return data['type'].astype('object')

def _proc_screen(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    screen_events = data[data['type'].isin(['SCREEN_ON', 'SCREEN_OFF', 'USER_PRESENT'])]
    
    Duration = []
    for pcode in screen_events.index.get_level_values('pcode').unique():
        sub_screen = screen_events.loc[(pcode, ), :].sort_index(axis=0, level='timestamp').assign(pcode=pcode)
        sub_screen = sub_screen.reset_index()
        sub_screen['screen_state'] = sub_screen['type'].shift().fillna('SCREEN_OFF')
        sub_screen.loc[0, 'screen_state'] = 'SCREEN_OFF'
        sub_screen = sub_screen[sub_screen['screen_state'] != sub_screen['type']]
        sub_screen.index = pd.to_datetime(sub_screen.index)
        sub_screen['duration'] = sub_screen['timestamp'] - sub_screen['timestamp'].shift()
        sub_screen.loc[0, 'duration'] = pd.Timedelta(0)
        sub_screen = sub_screen[sub_screen['duration'] > pd.Timedelta(0)]
        sub_screen['duration_sec'] = sub_screen['duration'].dt.total_seconds()
        Duration.append(sub_screen)
    
    Duration = pd.concat(Duration, axis=0, ignore_index=True).set_index(['pcode', 'timestamp'])
    
    return {
        'EVENT': screen_events['type'].astype('object'),
        'DUR': Duration['duration_sec'].astype('float32')
    }

def _proc_on_off(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return data['type'].astype('object')

def _proc_power_save(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return data['type'].astype('object')

def _proc_charge(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return data['type'].astype('object')


import math
import numpy as np
import pandas as pd
from poi import PoiCluster

# Define the center and radius of the circle around KAIST main campus
center_lat_kaist, center_lon_kaist = (36.3722, 127.3600)
_radius_kaist = 1000  # meters

# Define the center and radius of the circle around KAIST Munji campus
center_lat_munji, center_lon_munji = (36.391944, 127.398611)
_radius_munji = 400  # meters

def haversine(lat1, lon1, lat2, lon2):
    R = 6371000  # meters
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    a = math.sin(delta_phi / 2) ** 2 + \
        math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    d = R * c
    return d

def _haversine(_lat1, _lat2, _lng1, _lng2) -> float:
    if np.isnan(_lat1) or np.isnan(_lat2) or np.isnan(_lng1) or np.isnan(_lng2):
        return 0.0

    _lat1_r, _lat2_r, _lng1_r, _lng2_r = np.radians(_lat1), np.radians(_lat2), np.radians(_lng1), np.radians(_lng2)
    _lat = _lat2_r - _lat1_r
    _lng = _lng2_r - _lng1_r
    _R = 6371008.8
    _d = np.sin(_lat * 0.5) ** 2 + np.cos(_lat1_r) * np.cos(_lat2_r) * np.sin(_lng * 0.5) ** 2
    return 2 * _R * np.arcsin(np.sqrt(_d))

def midpoint(group):
    return pd.Series({
        'latitude': group['latitude'].mean(),
        'longitude': group['longitude'].mean()
    })

def label_cluster(group, radius):
    """Label clusters based on their distance from the centroid."""
    center_lat = group['mid_latitude'].iloc[0]
    center_lon = group['mid_longitude'].iloc[0]
    group['label'] = group.apply(
        lambda row: 'home' if haversine(row['latitude'], center_lat, row['longitude'], center_lon) < radius else 'work', axis=1
    )
    return group['label']

def _proc_location(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = []
    DISTANCE_MAX_IN_METRE = 100
    REGION_SIZE_IN_METRE = 250
    MAXIMUM_TIME_IN_MIN = 60
    MINIMUM_TIME_IN_MIN = 5

    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode,), :].sort_index(axis=0, level='timestamp').assign(
            _latitude=lambda x: x['latitude'].shift(1),
            _longitude=lambda x: x['longitude'].shift(1),
            dist=lambda x: x.apply(
                lambda y: _haversine(y['latitude'], y['_latitude'], y['longitude'], y['_longitude']),
                axis=1
            ),
            pcode=pcode
        ).reset_index()

        sub['timestamp'] = sub['timestamp'].apply(lambda x: int(pd.Timestamp(x).timestamp() * 1000))
        sub = sub[sub['accuracy'] < 100]

        latlon_rad = np.radians(sub.loc[:, ['latitude', 'longitude']].to_numpy())
        timestamps = sub.loc[:, 'timestamp'].values
        cluster = PoiCluster(
            d_max=DISTANCE_MAX_IN_METRE, r_max=REGION_SIZE_IN_METRE, t_max=MAXIMUM_TIME_IN_MIN * 60 * 1000,
            t_min=MINIMUM_TIME_IN_MIN * 60 * 1000
        ).fit(X=latlon_rad, timestamps=timestamps)
        labels = cluster.predict(X=latlon_rad)
        sub = sub.assign(cluster=labels)

        sub['cluster'].replace('', 'NONE', inplace=True)
        sub[['mid_latitude', 'mid_longitude']] = sub.groupby('cluster', group_keys=False).apply(midpoint).apply(pd.Series)
        sub = sub.assign(
            _timestamp=lambda x: pd.to_datetime(x['timestamp'], unit='ms', utc=True).dt.tz_convert(DEFAULT_TZ)
        )
        sub = sub.sort_values(by='_timestamp')
        sub['day_of_week'] = sub['_timestamp'].dt.dayofweek
        sub['hour_of_day'] = sub['_timestamp'].dt.hour
        sub['day_or_night'] = sub['_timestamp'].apply(lambda x: 1 if 9 <= x.hour < 18 else 0)
        sub['wkday_or_wkend'] = sub['_timestamp'].apply(lambda x: 1 if x.dayofweek <= 4 else 0)
        sub['duration'] = sub['_timestamp'].diff().apply(lambda x: x.total_seconds() if isinstance(x, pd.Timedelta) else x)
        sub = sub.fillna(0)
        home = sub[sub['day_or_night'] == 0]['duration'].groupby(sub['cluster']).sum().idxmax()
        work = sub[sub['day_or_night'] == 1][sub['wkday_or_wkend'] == 1][sub['cluster'] != home]['duration'].groupby(sub['cluster']).sum().idxmax()
        condition_home = sub['cluster'] == home
        condition_work = sub['cluster'] == work
        condition_none = sub['cluster'] == 'NONE'
        sub.loc[condition_home, 'label'] = 'home'
        sub.loc[condition_work, 'label'] = 'work'
        sub.loc[condition_none, 'label'] = 'none'

        radius = 100
        mask = sub['label'].isna()
        sub['label'] = sub[mask].groupby('cluster', group_keys=False).apply(lambda x: label_cluster(x, radius)).apply(pd.Series)

        condition_home = sub['cluster'] == home
        condition_work = sub['cluster'] == work
        condition_none = sub['cluster'] == 'NONE'
        sub.loc[condition_home, 'label'] = 'home'
        sub.loc[condition_work, 'label'] = 'work'
        sub.loc[condition_none, 'label'] = 'none'

        centers = sub[sub['label'] == 'others'].groupby('cluster').mean()
        distances_kaist = centers.apply(lambda row: haversine(center_lat_kaist, center_lon_kaist, row['latitude'], row['longitude']), axis=1)
        distances_munji = centers.apply(lambda row: haversine(center_lat_munji, center_lon_munji, row['latitude'], row['longitude']), axis=1)

        in_circle_kaist = distances_kaist <= _radius_kaist
        cluster_centers_in_circle_kaist = centers[in_circle_kaist]

        in_circle_munji = distances_munji <= _radius_munji
        cluster_centers_in_circle_munji = centers[in_circle_munji]

        cluster_centers_in_circle = pd.concat([cluster_centers_in_circle_kaist, cluster_centers_in_circle_munji])

        condition_work_other = sub['cluster'].isin(cluster_centers_in_circle.index)
        sub.loc[condition_work_other, 'label'] = 'work'

        new_data.append(sub)

    new_data = pd.concat(new_data, axis=0, ignore_index=True)
    new_data['timestamp'] = pd.to_datetime(new_data['timestamp'], unit='ms', utc=True).dt.tz_convert(DEFAULT_TZ)
    new_data = new_data.set_index(['pcode', 'timestamp'])


    return {
        'LABEL': new_data['label'].astype('object'),
        'DST': new_data['dist'].astype('float32')
    }



def _proc_wifi(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = []
    
    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode, ), :].sort_index(axis=0, level='timestamp').assign(
            bssid=lambda x: x['address'].str.cat(x['frequency'].astype(str), sep='-')
        )
        t = sub.index.unique().array
        for cur_t, prev_t in zip(t, t.shift(1)):
            if cur_t is pd.NaT or prev_t is pd.NaT:
                continue

            prev = sub.loc[[prev_t], :]
            cur = sub.loc[[cur_t], :]
            intersect = np.intersect1d(prev['bssid'], cur['bssid'])
            union = np.union1d(prev['bssid'], cur['bssid'])
            w = np.repeat(1 / len(intersect), len(intersect)) if len(intersect) else 1.0
            prev_intersect = prev.loc[
                lambda x: x['bssid'].isin(intersect), :
            ].sort_values('bssid')
            cur_intersect = cur.loc[
                lambda x: x['bssid'].isin(intersect), :
            ].sort_values('bssid')
            prev_rssi = prev_intersect['rssi']
            cur_rssi = cur_intersect['rssi']

            new_data.append(dict(
                pcode=pcode,
                timestamp=cur_t,
                cosine=1 - dist.cosine(prev_rssi, cur_rssi) if len(intersect) > 0 else 0,
                euclidean=1 / (1 + dist.euclidean(prev_rssi, cur_rssi, w)) if len(intersect) > 0 else 0,
                manhattan=1 / (1 + dist.cityblock(prev_rssi, cur_rssi, w)) if len(intersect) > 0 else 0,
                jaccard = len(intersect) / len(union) if len(union) > 0 else 0
            ))
            
    new_data = pd.DataFrame(new_data).set_index(['pcode', 'timestamp'])
    
    return {
        'COS': new_data['cosine'].astype('float32'),
        'EUC': new_data['euclidean'].astype('float32'),
        'MAN': new_data['manhattan'].astype('float32'),
        'JAC': new_data['jaccard'].astype('float32')
    }

def _proc_installed_app(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = []
    
    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode, ), :].sort_index(axis=0, level='timestamp')
        t = sub.index.unique().array
        for cur_t, prev_t in zip(t, t.shift(1)):
            if cur_t is pd.NaT or prev_t is pd.NaT:
                continue

            prev = sub.loc[[prev_t], :]
            cur = sub.loc[[cur_t], :]
            intersect = np.intersect1d(prev['packageName'], cur['packageName'])
            union = np.union1d(prev['packageName'], cur['packageName'])
            new_data.append(dict(
                pcode=pcode,
                timestamp=cur_t,
                jaccard=len(intersect) / len(union) if len(union) > 0 else 0
            ))
            
    new_data = pd.DataFrame(new_data).set_index(['pcode', 'timestamp'])
    
    return {
       'JAC': new_data['jaccard'].astype('float32')
    }

def _proc_media_event(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = defaultdict(list)
    
    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode, ), :].sort_index(axis=0, level='timestamp')

        video = sub.loc[lambda x: x['mimetype'].str.startswith('video'), :].assign(event=1, pcode=pcode).reset_index()
        image = sub.loc[lambda x: x['mimetype'].str.startswith('image'), :].assign(event=1, pcode=pcode).reset_index()
        media = sub.assign(event=1, pcode=pcode).reset_index()

        new_data['VID'].append(video)
        new_data['IMG'].append(image)
        new_data['ALL'].append(media)

    return {
        k: pd.concat(v, axis=0, ignore_index=True).set_index(['pcode', 'timestamp'])['event'].astype('float32') 
        for k, v in new_data.items()
    }

def _proc_message_event(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = defaultdict(list)
    
    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode, ), :].sort_index(
            axis=0, level='timestamp'
        )

        sent = sub.loc[
            lambda x: x['messageBox'] == 'SENT', :
        ].assign(
            event=1,
            pcode=pcode
        ).reset_index()

        recv = sub.loc[
            lambda x: x['messageBox'] == 'INBOX', :
        ].assign(
            event=1,
            pcode=pcode
        ).reset_index()

        msg = sub.assign(
            event=1,
            pcode=pcode
        ).reset_index()

        new_data['SNT'].append(sent)
        new_data['RCV'].append(recv)
        new_data['ALL'].append(msg)

    return {
        k: pd.concat(
            v, axis=0, ignore_index=True
        ).set_index(
            ['pcode', 'timestamp']
        )['event'].astype('float32') 
        for k, v in new_data.items()
    }


def _proc_calories(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    new_data = []

    for pcode in data.index.get_level_values('pcode').unique():
        sub = data.loc[(pcode, ), :].sort_index(axis=0, level='timestamp').assign(
            calories=lambda x: x['value'],
            pcode=pcode
        ).reset_index()

        new_data.append(sub)

    new_data = pd.concat(new_data, axis=0, ignore_index=True).set_index(['pcode', 'timestamp'])

    return new_data['calories'].dropna().astype('float32')

def _proc_bluetooth_scan(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    # Simply return the bondState, deviceType, and classType as they are
    return { 
        'BondState' : data['bondState'].astype('object'),
        'DeviceType': data['deviceType'].astype('object'), 
        'classType': data['classType'].astype('object'), 
    }

def _proc_notification_event(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return {
        'VIS': data['visibility'].astype('object'),
        'CAT': data['category'].astype('object')
    }


In [3]:
def _load_data(name: str) -> Optional[pd.DataFrame]:
    paths = [
        (d, os.path.join(PATH_SENSOR, d, f'{name}.csv'))
        for d in os.listdir(PATH_SENSOR)
        if d.startswith('P')
    ]
    
    # Debugging: Print the paths being checked
    print(f"Paths for {name}: {paths}")
    
    valid_paths = [
        (pcode, p) for pcode, p in paths if os.path.exists(p)
    ]
    
    # Debugging: Print the valid paths found
    print(f"Valid paths for {name}: {valid_paths}")
    
    if not valid_paths:
        print(f"No valid paths found for {name}")
        return pd.DataFrame()  # Return an empty DataFrame if no valid paths
    
    return pd.concat(
        filter(
            lambda x: len(x.index), 
            [
                pd.read_csv(p).assign(pcode=pcode)
                for pcode, p in valid_paths
                if os.path.exists(p)
            ]
        ), ignore_index=True
    ).assign(
        timestamp=lambda x: pd.to_datetime(x['timestamp'], unit='ms', utc=True).dt.tz_convert(DEFAULT_TZ)
    ).set_index(
        ['pcode', 'timestamp']
    )

def _proc_dozemode_event(data: pd.DataFrame) -> pd.Series:
    # Ensure the data is sorted by timestamp
    data = data.sort_values(by='timestamp')
    return data['type'].astype('object')

def _proc_fitbit_heartrate(data: pd.DataFrame) -> Union[pd.Series, Dict[str, pd.Series]]:
    return data['value'].astype('float32')


In [4]:
DATA_TYPES = {
    'Fitness-Calorie': 'CAL',
    'AppUsageEvent': 'APP',
    'BatteryEvent': 'BAT',
    'CallEvent': 'CALL',
    'DataTraffic': 'DATA',
    'InstalledApp': 'INST',
    'Location': 'LOC',
    'MessageEvent': 'MSG',
    'WiFiScan': 'WIFI',
    'ScreenEvent': 'SCR',
    'RingerModeEvent': 'RING',
    'ChargeEvent': 'CHG',
    'PowerSaveEvent': 'PWR',
#     'OnOffEvent': 'ONOFF',
    'BluetoothScan':'BT',
    'DozeModeEvent': 'Dozemode',
    'Fitbit-HeartRate':'Heartrate',
    'NotificationEvent': 'Notification'
}

In [5]:
import os
import pandas as pd
import gc
from functools import reduce
import warnings
from pandas.errors import PerformanceWarning
import ray

warnings.simplefilter(action='ignore', category=PerformanceWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

In [6]:
FUNC_PROC = {
    'Fitness-Calorie': _proc_calories,
    'AppUsageEvent': _proc_app_usage,
    'BatteryEvent': _proc_battery,
    'CallEvent': _proc_call,
    'DataTraffic': _proc_data_traffic,
    'InstalledApp': _proc_installed_app,
    'Location': _proc_location,
    'MessageEvent': _proc_message_event,
    'WifiScan': _proc_wifi,
    'ScreenEvent': _proc_screen,
    'RingerModeEvent': _proc_ringer_mode,
    'ChargeEvent': _proc_charge,
    'PowerSaveEvent': _proc_power_save,
#     'OnOffEvent': _proc_on_off,
    'BluetoothScan': _proc_bluetooth_scan,
    'DozeModeEvent': _proc_dozemode_event,
    'Fitbit-HeartRate': _proc_fitbit_heartrate,
    'NotificationEvent': _proc_notification_event
}

In [7]:
import os
import pandas as pd
import gc
from typing import Optional, Union, Dict
import numpy as np
import ray
from collections import defaultdict
from poi import PoiCluster 

def _process(data_type: str):
    log(f'Begin to processing data: {data_type}')
    
    abbrev = DATA_TYPES[data_type]
    data_raw = _load_data(data_type)
    
    # Debugging: Check if data_raw is empty
    if data_raw.empty:
        print(f"No data loaded for {data_type}")
        log(f"No data loaded for {data_type}")
        return {}
    
    data_proc = FUNC_PROC[data_type](data_raw)
    result = dict()
    
    if type(data_proc) is dict:
        for k, v in data_proc.items():
            result[f'{abbrev}_{k}'] = v
    else:
        result[abbrev] = data_proc
        
    log(f'Complete processing data: {data_type}')
    return result


In [8]:
with on_ray():
    jobs = []
    
    func = ray.remote(_process).remote
    
    for data_type in DATA_TYPES:
        job = func(data_type)
        jobs.append(job)

    jobs = ray.get(jobs)
    jobs = reduce(lambda a, b: {**a, **b}, jobs)
    dump(jobs, os.path.join(PATH_INTERMEDIATE, 'proc.pkl'))

    del jobs
    gc.collect()

2024-07-25 04:44:46,459	INFO worker.py:1612 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


[2m[36m(_process pid=2282678)[0m [24-07-25 04:44:48] Begin to processing data: DataTraffic
[2m[36m(_process pid=2282689)[0m [24-07-25 04:44:48] Begin to processing data: Fitness-Calorie
[2m[36m(_process pid=2282685)[0m [24-07-25 04:44:48] Begin to processing data: PowerSaveEvent
[2m[36m(_process pid=2282679)[0m [24-07-25 04:44:48] Begin to processing data: MessageEvent
[2m[36m(_process pid=2282683)[0m [24-07-25 04:44:48] Begin to processing data: Location
[2m[36m(_process pid=2282687)[0m [24-07-25 04:44:48] Begin to processing data: DozeModeEvent
[2m[36m(_process pid=2282687)[0m Paths for DozeModeEvent: [('P126', '/var/nfs_share/D#4/newdata/P126/DozeModeEvent.csv'), ('P041', '/var/nfs_share/D#4/newdata/P041/DozeModeEvent.csv'), ('P008', '/var/nfs_share/D#4/newdata/P008/DozeModeEvent.csv'), ('P026', '/var/nfs_share/D#4/newdata/P026/DozeModeEvent.csv'), ('P065', '/var/nfs_share/D#4/newdata/P065/DozeModeEvent.csv'), ('P124', '/var/nfs_share/D#4/newdata/P124/DozeModeEv

[2m[36m(_process pid=2282687)[0m Valid paths for DozeModeEvent: [('P126', '/var/nfs_share/D#4/newdata/P126/DozeModeEvent.csv'), ('P041', '/var/nfs_share/D#4/newdata/P041/DozeModeEvent.csv'), ('P008', '/var/nfs_share/D#4/newdata/P008/DozeModeEvent.csv'), ('P026', '/var/nfs_share/D#4/newdata/P026/DozeModeEvent.csv'), ('P065', '/var/nfs_share/D#4/newdata/P065/DozeModeEvent.csv'), ('P124', '/var/nfs_share/D#4/newdata/P124/DozeModeEvent.csv'), ('P116', '/var/nfs_share/D#4/newdata/P116/DozeModeEvent.csv'), ('P123', '/var/nfs_share/D#4/newdata/P123/DozeModeEvent.csv'), ('P091', '/var/nfs_share/D#4/newdata/P091/DozeModeEvent.csv'), ('P040', '/var/nfs_share/D#4/newdata/P040/DozeModeEvent.csv'), ('P038', '/var/nfs_share/D#4/newdata/P038/DozeModeEvent.csv'), ('P078', '/var/nfs_share/D#4/newdata/P078/DozeModeEvent.csv'), ('P061', '/var/nfs_share/D#4/newdata/P061/DozeModeEvent.csv'), ('P043', '/var/nfs_share/D#4/newdata/P043/DozeModeEvent.csv'), ('P075', '/var/nfs_share/D#4/newdata/P075/DozeMode

[2m[36m(_process pid=2282687)[0m [24-07-25 04:44:50] Complete processing data: DozeModeEvent






[2m[36m(_process pid=2282689)[0m [24-07-25 04:44:51] Complete processing data: Fitness-Calorie
[2m[36m(_process pid=2282676)[0m [24-07-25 04:44:54] Complete processing data: CallEvent
[2m[36m(_process pid=2282677)[0m [24-07-25 04:44:54] Complete processing data: RingerModeEvent
[2m[36m(_process pid=2282685)[0m [24-07-25 04:44:54] Complete processing data: PowerSaveEvent
[2m[36m(_process pid=2282684)[0m [24-07-25 04:44:54] Complete processing data: ChargeEvent




[2m[36m(_process pid=2282679)[0m [24-07-25 04:44:55] Complete processing data: MessageEvent




[2m[36m(_process pid=2282688)[0m [24-07-25 04:44:58] Complete processing data: ScreenEvent














[2m[36m(_process pid=2282683)[0m [24-07-25 04:45:25] Complete processing data: Location
[2m[36m(_process pid=2282682)[0m [24-07-25 04:45:26] Complete processing data: BatteryEvent
[2m[36m(_process pid=2282678)[0m [24-07-25 04:46:09] Complete processing data: DataTraffic




[2m[36m(_process pid=2282680)[0m [24-07-25 04:46:11] Complete processing data: Fitbit-HeartRate




[2m[36m(_process pid=2282675)[0m [24-07-25 04:46:19] Complete processing data: BluetoothScan




[2m[36m(_process pid=2282674)[0m [24-07-25 04:46:32] Complete processing data: NotificationEvent




[2m[36m(_process pid=2282681)[0m [24-07-25 04:46:43] Complete processing data: InstalledApp
[2m[36m(_process pid=2282686)[0m [24-07-25 04:46:47] Complete processing data: AppUsageEvent


