# Casas smarthome ADS Coursework

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import datetime
import os
import re
import seaborn as sns
from collections import Counter
from datetime import datetime



In [4]:
sensor_codes = ['M001', 'M002', 'M003', 'M004', 'M005', 'M006', 'M007', 
                'M008', 'M009', 'M010', 'M011', 'M012', 'M013', 'M014',
                'M015', 'M016', 'M017', 'M018', 'M019', 'M020', 'M021', 
                'M022', 'M023', 'M024', 'M025', 'M026', 'M027', 'M028',
                'M029', 'M030', 'D001', 'D002', 'D003', 'D004', 'T001',
                'T002', 'T003', 'T004', 'T005']

# Data cleaning

In [5]:
def load_dataset(filename):
    timestamps = []
    sensors = []
    values = []
    activities = []
    current_activity = ''  # empty

    with open(filename, 'rb') as features:
        database = features.readlines()
        
        for i, line in enumerate(database):  # each line
            f_info = line.decode().split()  # find fields
            try:
                if 'M' == f_info[2][0] or 'D' == f_info[2][0] or 'T' == f_info[2][0]:
                    if str(np.array(f_info[2])) in sensor_codes:
                        sensors.append(str(np.array(f_info[2])))
                    else:
                        continue
                       
                    if 'OFF' in f_info[3]:
                        values.append('OFF')
                    elif 'ON' in f_info[3]:
                        values.append('ON')
                    else:
                        try:
                            values.append(float(str(np.array(f_info[3]))))
                        except ValueError:
                            del sensors[-1]
                            continue
                            
                    # choose only M D T sensors, avoiding unexpected errors
                    if not ('.' in str(np.array(f_info[0])) + str(np.array(f_info[1]))):
                        f_info[1] = f_info[1] + '.000000'
                        
                    try:
                        timestamps.append(datetime.strptime(str(np.array(f_info[0])) + str(np.array(f_info[1])),
                                                        "%Y-%m-%d%H:%M:%S.%f"))
                    except ValueError:
                        del sensors[-1]
                        del values[-1]
                        continue
                        
                    if len(f_info) == 4:  # if activity does not exist
                        activities.append(current_activity)
                    else:  # if activity exists
                        des = str(' '.join(np.array(f_info[4:])))
                        if 'begin' in des:
                            current_activity = re.sub('begin', '', des)
                            if current_activity[-1] == ' ':  # if white space at the end
                                current_activity = current_activity[:-1]  # delete white space
                            activities.append(current_activity)
                        if 'end' in des:
                            activities.append(current_activity)
                            current_activity = ''
            except IndexError:
                print(i, line)
                
    features.close()
    
    return pd.DataFrame(np.transpose(np.array([timestamps, sensors, values, activities])), 
                        columns = ['Timestamps', 'Sensors', 'Values', 'Activities'])
    

In [6]:
aruba_dataset = load_dataset("./datasets/aruba/data")

In [26]:
copy_aruba = aruba_dataset.copy();

In [27]:
def add_transition_labels(aruba_set):
    new_activity = ''
    previous_activity = ''
    
    for i, entry in enumerate(aruba_set['Activities']): 
        if entry == '':
            if new_activity != '':
                aruba_set.at[i, 'Activities'] = new_activity
            else:
                for next_entry in aruba_set['Activities'][i:]:
                    if next_entry != '':
                        new_activity = 'Transition_' + previous_activity + '_' + next_entry
                        aruba_set.at[i, 'Activities'] = new_activity
                        break            
        else:
            previous_activity = entry
            new_activity = ''
    return aruba_set

In [28]:
complete_aruba = add_transition_labels(copy_aruba)

In [30]:
complete_aruba.head()

Unnamed: 0,Timestamps,Sensors,Values,Activities
0,2010-11-04 00:03:50.209589,M003,ON,Sleeping
1,2010-11-04 00:03:57.399391,M003,OFF,Sleeping
2,2010-11-04 00:15:08.984841,T002,21.5,Sleeping
3,2010-11-04 00:30:19.185547,T003,21.0,Sleeping
4,2010-11-04 00:30:19.385336,T004,21.0,Sleeping


# Pre-processing

In [34]:
def process_time_data(dataset):
    weekday = []
    seconds = []
    daytime = []
    
    for instance in dataset["Timestamps"]:
        weekday.append(instance.day_of_week)
        seconds.append(instance.hour * 3600 + instance.minute * 60 + instance.second)
        if instance.hour >= 0 and instance.hour <= 6:
            daytime.append('Morning_Night')
        elif instance.hour >= 6 and instance.hour <= 12:
            daytime.append('Morning_Day')
        elif instance.hour >= 12 and instance.hour <= 18:
            daytime.append('Afternoon')
        else: 
            daytime.append('Evening')
        
    dataset["Weekday"] = weekday
    dataset["Seconds"] = seconds
    dataset["Daytime"] = daytime
    return dataset


In [32]:
copy_complete_aruba = complete_aruba.copy();

In [35]:
timed_aruba = process_time_data(copy_complete_aruba)

In [36]:
timed_aruba.head(50)


Unnamed: 0,Timestamps,Sensors,Values,Activities,Weekday,Seconds,Daytime
0,2010-11-04 00:03:50.209589,M003,ON,Sleeping,3,230,Morning_Night
1,2010-11-04 00:03:57.399391,M003,OFF,Sleeping,3,237,Morning_Night
2,2010-11-04 00:15:08.984841,T002,21.5,Sleeping,3,908,Morning_Night
3,2010-11-04 00:30:19.185547,T003,21.0,Sleeping,3,1819,Morning_Night
4,2010-11-04 00:30:19.385336,T004,21.0,Sleeping,3,1819,Morning_Night


In [None]:
complete_aruba.describe()

In [None]:
sns.catplot(x = 'Activities', y = 'Timestamps', data = complete_aruba)