In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import datetime
import os
import re
from collections import Counter
from datetime import datetime

import numpy as np
from keras.preprocessing import sequence

In [3]:
sensor_codes = ['M001', 'M002', 'M003', 'M004', 'M005', 'M006', 'M007', 
                'M008', 'M009', 'M010', 'M011', 'M012', 'M013', 'M014',
                'M015', 'M016', 'M017', 'M018', 'M019', 'M020', 'M021', 
                'M022', 'M023', 'M024', 'M025', 'M026', 'M027', 'M028',
                'M029', 'M030', 'D001', 'D002', 'D003', 'D004', 'T001',
                'T002', 'T003', 'T004', 'T005']

In [4]:
def load_dataset(filename):
    # dateset fields
    timestamps = []
    sensors = []
    values = []
    activities = []

    current_activity = ''  # empty

    with open(filename, 'rb') as features:
        database = features.readlines()
        
        for i, line in enumerate(database):  # each line
            f_info = line.decode().split()  # find fields
            try:
                if 'M' == f_info[2][0] or 'D' == f_info[2][0] or 'T' == f_info[2][0]:
                    if str(np.array(f_info[2])) in sensor_codes:
                        sensors.append(str(np.array(f_info[2])))
                    else:
                        continue
                       
                    if 'OFF' in f_info[3]:
                        values.append('OFF')
                    elif 'ON' in f_info[3]:
                        values.append('ON')
                    else:
                        try:
                            values.append(float(str(np.array(f_info[3]))))
                        except ValueError:
                            del sensors[-1]
                            continue
                            
                    # choose only M D T sensors, avoiding unexpected errors
                    if not ('.' in str(np.array(f_info[0])) + str(np.array(f_info[1]))):
                        f_info[1] = f_info[1] + '.000000'
                        
                    try:
                        timestamps.append(datetime.strptime(str(np.array(f_info[0])) + str(np.array(f_info[1])),
                                                        "%Y-%m-%d%H:%M:%S.%f"))
                    except ValueError:
                        del sensors[-1]
                        del values[-1]
                        continue
                        
                    if len(f_info) == 4:  # if activity does not exist
                        activities.append(current_activity)
                    else:  # if activity exists
                        des = str(' '.join(np.array(f_info[4:])))
                        if 'begin' in des:
                            current_activity = re.sub('begin', '', des)
                            if current_activity[-1] == ' ':  # if white space at the end
                                current_activity = current_activity[:-1]  # delete white space
                            activities.append(current_activity)
                        if 'end' in des:
                            activities.append(current_activity)
                            current_activity = ''
            except IndexError:
                print(i, line)
                
    features.close()
    # dictionaries: assigning keys to values
    temperature = []
    for element in values:
        try:
            temperature.append(float(element))
        except ValueError:
            pass
    
    return timestamps, sensors, values, activities, temperature

In [5]:
def AssignKeysToValues(timestamps, sensors, values, activities, temperature):
    
    for i in range(0, len(values)):
        values[i] = str(values[i]) ##VALUES TO STR
    
    sensorsList = sorted(set(sensors))
    dictSensors = {}
    for i, sensor in enumerate(sensorsList):
        dictSensors[sensor] = i
    activityList = sorted(set(activities))
    dictActivities = {}
    for i, activity in enumerate(activityList):
        dictActivities[activity] = i
    valueList = sorted(set(values))
    dictValues = {}
    for i, v in enumerate(valueList):
        dictValues[v] = i
    dictObs = {}
    count = 0
    for key in dictSensors.keys():
        if "M" or "AD" in key:
            dictObs[key + "OFF"] = count
            count += 1
            dictObs[key + "ON"] = count
            count += 1
        if "D" in key:
            dictObs[key + "CLOSE"] = count
            count += 1
            dictObs[key + "OPEN"] = count
            count += 1
        if "T" in key:
            for temp in range(0, int((max(temperature) - min(temperature)) * 2) + 1):
                dictObs[key + str(float(temp / 2.0) + min(temperature))] = count + temp

    XX = []
    YY = []
    X = []
    Y = []
    for kk, s in enumerate(sensors):
        if "T" in s:
            try:
                XX.append(dictObs[s + str(round(float(values[kk]), 1))])
            except Exception:
                if len(XX) > 0:
                    XX.append(XX[-1])
                else:
                    XX.append(0)
                
        else:
            try:
                XX.append(dictObs[s + str(values[kk])])
            except Exception:
                if len(XX) > 0:
                    XX.append(XX[-1])
                else:
                    XX.append(0)
        YY.append(dictActivities[activities[kk]])

    x = []
    for i, y in enumerate(YY):
        if i == 0:
            Y.append(y)
            x = [XX[i]]
        if i > 0:
            if y == YY[i - 1]:
                x.append(XX[i])
            else:
                Y.append(y)
                X.append(x)
                x = [XX[i]]
        if i == len(YY) - 1:
            if y != YY[i - 1]:
                Y.append(y)
            X.append(x)
    return X, Y, dictActivities

In [6]:
def add_transition_labels(aruba_set):
    new_activity = ''
    previous_activity = ''
    
    for i, entry in enumerate(aruba_set[3]): 
        if entry == '':
            if new_activity != '':
                aruba_set[3][i] = new_activity
            else:
                for next_entry in aruba_set[3][i:]:
                    if next_entry != '':
                        new_activity = 'Transition_' + previous_activity + '_' + next_entry
                        aruba_set[3][i] = new_activity
                        break            
        else:
            previous_activity = entry
            new_activity = ''
    return aruba_set

In [7]:
aruba_dataset = load_dataset("./datasets/aruba/data")

In [8]:
aruba_dataset

([datetime.datetime(2010, 11, 4, 0, 3, 50, 209589),
  datetime.datetime(2010, 11, 4, 0, 3, 57, 399391),
  datetime.datetime(2010, 11, 4, 0, 15, 8, 984841),
  datetime.datetime(2010, 11, 4, 0, 30, 19, 185547),
  datetime.datetime(2010, 11, 4, 0, 30, 19, 385336),
  datetime.datetime(2010, 11, 4, 0, 35, 22, 245870),
  datetime.datetime(2010, 11, 4, 0, 40, 25, 428962),
  datetime.datetime(2010, 11, 4, 0, 45, 28, 658171),
  datetime.datetime(2010, 11, 4, 1, 5, 42, 269469),
  datetime.datetime(2010, 11, 4, 1, 15, 48, 936777),
  datetime.datetime(2010, 11, 4, 1, 30, 59, 100184),
  datetime.datetime(2010, 11, 4, 1, 46, 9, 301846),
  datetime.datetime(2010, 11, 4, 1, 56, 16, 59687),
  datetime.datetime(2010, 11, 4, 2, 21, 32, 138203),
  datetime.datetime(2010, 11, 4, 2, 21, 32, 228625),
  datetime.datetime(2010, 11, 4, 2, 32, 33, 351906),
  datetime.datetime(2010, 11, 4, 2, 32, 38, 895958),
  datetime.datetime(2010, 11, 4, 2, 41, 45, 231875),
  datetime.datetime(2010, 11, 4, 2, 56, 55, 529628),

In [9]:
aruba_dataset = add_transition_labels(aruba_dataset)

In [10]:
aruba_dataset[3]

['Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Sleeping',
 'Transition_Sleeping_Bed_to_Toilet',
 'Transition_Sleeping_Bed_to_Toilet',
 'Transition_Sleeping_Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Bed_to_Toilet',
 'Transition_Bed_to_Toilet_Sleepi

In [11]:
len(aruba_dataset[0]), len(aruba_dataset[1]), len(aruba_dataset[2]), len(aruba_dataset[3]), len(aruba_dataset[0])

(1709857, 1709857, 1709857, 1709857, 1709857)

In [12]:
X, Y, dict_activities = AssignKeysToValues(aruba_dataset[0], aruba_dataset[1], aruba_dataset[2], aruba_dataset[3], aruba_dataset[4])

In [14]:
dict_activities

{'Bed_to_Toilet': 0,
 'Eating': 1,
 'Enter_Home': 2,
 'Housekeeping': 3,
 'Leave_Home': 4,
 'Meal_Preparation': 5,
 'Relax': 6,
 'Respirate': 7,
 'Sleeping': 8,
 'Transition_Bed_to_Toilet_Bed_to_Toilet': 9,
 'Transition_Bed_to_Toilet_Meal_Preparation': 10,
 'Transition_Bed_to_Toilet_Sleeping': 11,
 'Transition_Eating_Eating': 12,
 'Transition_Eating_Enter_Home': 13,
 'Transition_Eating_Meal_Preparation': 14,
 'Transition_Eating_Relax': 15,
 'Transition_Eating_Sleeping': 16,
 'Transition_Eating_Wash_Dishes': 17,
 'Transition_Eating_Work': 18,
 'Transition_Enter_Home_Relax': 19,
 'Transition_Leave_Home_Meal_Preparation': 20,
 'Transition_Meal_Preparation_Eating': 21,
 'Transition_Meal_Preparation_Leave_Home': 22,
 'Transition_Meal_Preparation_Meal_Preparation': 23,
 'Transition_Meal_Preparation_Relax': 24,
 'Transition_Meal_Preparation_Respirate': 25,
 'Transition_Meal_Preparation_Work': 26,
 'Transition_Relax_Eating': 27,
 'Transition_Relax_Enter_Home': 28,
 'Transition_Relax_Housekeepi

In [15]:
X_padded_7000 = sequence.pad_sequences(X, maxlen=2000, dtype='int32')

In [16]:
np.save('X_LSTM.npy', X_padded_7000)
np.save('y_LSTM.npy', Y)
np.save('Labels_LSTM.npy', dict_activities)

NameError: name 'dictActivities' is not defined

In [117]:
testulescu = np.load('X_LSTM.npy', allow_pickle=True)
