In [1]:
import pandas as pd
import os
import io
import datetime

In [2]:
start_time = '2014-04-01'
# 1. Convert the_day string to a datetime object
date_obj = datetime.datetime.strptime(start_time, '%Y-%m-%d')
end_time = date_obj + datetime.timedelta(days=6)
print(end_time)
#end_time = '2014-04-07'
one_week_data = pd.read_csv('one_week_data.csv')
one_week_data['Timestamp'] = pd.to_datetime(one_week_data['Timestamp'])
# one_week_data.head(5)

2014-04-07 00:00:00


In [3]:
the_day = '2014-04-05'

# 1. Convert the_day string to a datetime object
date_obj = datetime.datetime.strptime(the_day, '%Y-%m-%d')

# 2. Calculate 9 PM of the given date
nine_pm_current_day = date_obj.replace(hour=21, minute=0, second=0, microsecond=0)

# 3. Calculate 9 PM of one day earlier
one_day_earlier = date_obj - datetime.timedelta(days=1)
nine_pm_one_day_earlier = one_day_earlier.replace(hour=21, minute=0, second=0, microsecond=0)

# 4. Get the timestamps (Unix timestamps)
timestamp_nine_pm_one_day_earlier = nine_pm_one_day_earlier.timestamp()
timestamp_nine_pm_current_day = nine_pm_current_day.timestamp()

print(f"Date used: {the_day}")
print(f"9 PM one day earlier ({nine_pm_one_day_earlier}): {timestamp_nine_pm_one_day_earlier}")
print(f"9 PM of the date ({nine_pm_current_day}): {timestamp_nine_pm_current_day}")

Date used: 2014-04-05
9 PM one day earlier (2014-04-04 21:00:00): 1396616400.0
9 PM of the date (2014-04-05 21:00:00): 1396702800.0


In [4]:
one_day_data = one_week_data[(one_week_data['Timestamp'] >= nine_pm_one_day_earlier) 
    & (one_week_data['Timestamp'] <= nine_pm_current_day)]
# one_day_data.head(20)

In [5]:
# 1. Filter for Motion Sensors and Sort by Timestamp
motion_df = one_day_data[one_day_data['SensorType'] == 'Control4-Motion'].copy()
motion_df = motion_df.sort_values(by='Timestamp').reset_index(drop=True)

# 2. Convert 'Value' to boolean 'IsMotion'
motion_df['IsMotion'] = motion_df['Value'] == 'ON'

# 3. Identify the "active room" at each timestamp.
#    If multiple rooms are ON simultaneously, this logic prioritizes the latest ON.
#    A more robust approach might track all active rooms.
#    For simplicity, let's assume the resident is primarily in one room at a time,
#    and the latest ON event signifies their current location.

# Create a temporary column that only has the room if motion is detected
motion_df['ActiveRoomCandidate'] = motion_df.apply(
    lambda row: row['Attribute1'] if row['IsMotion'] else None, axis=1
)

# Fill forward the ActiveRoomCandidate to represent the current "known" room
# This will propagate the last known active room until a new one is detected.
motion_df['CurrentActiveRoom'] = motion_df['ActiveRoomCandidate'].ffill()

# Drop rows where there's no known active room (e.g., before any motion)
motion_df = motion_df.dropna(subset=['CurrentActiveRoom'])


In [8]:
# A more precise way to mark the start of a new "stay block":
# A new block starts if:
#   a) The room changes
#   b) The sensor turns ON in the current room, and the previous state in that room was OFF (or non-existent)
#   c) The sensor turns OFF in the current room, and it was the last active room AND all sensors in that room are now off (this needs more context than a simple shift)

# Let's refine the "RoomChange" logic for stays.
# A new "stay" block begins when:
# 1. The 'CurrentActiveRoom' changes.
# 2. Motion becomes 'ON' in the current `CurrentActiveRoom` (after potentially being 'OFF' or not recorded).
# We'll use a `cumsum` on these change points.

motion_df['stay_group_start'] = (
    (motion_df['CurrentActiveRoom'] != motion_df['CurrentActiveRoom'].shift(1))
).astype(int).cumsum()

motion_df.head(10)

Unnamed: 0.1,Unnamed: 0,SensorID,Attribute1,Attribute2,Value,SensorType,Timestamp,IsMotion,ActiveRoomCandidate,CurrentActiveRoom,stay_group_start
0,6434541,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:44.941567,True,Kitchen,Kitchen,1
1,6434543,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:46.976946,False,,Kitchen,1
2,6434544,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:53.932224,True,Kitchen,Kitchen,1
3,6434546,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:55.039010,False,,Kitchen,1
4,6434547,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:56.700078,True,Kitchen,Kitchen,1
5,6434549,M005,DiningRoom,DiningRoom,ON,Control4-Motion,2014-04-04 21:00:58.212312,True,DiningRoom,DiningRoom,2
6,6434551,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:59.562827,False,,DiningRoom,2
7,6434552,M005,DiningRoom,DiningRoom,OFF,Control4-Motion,2014-04-04 21:01:00.626681,False,,DiningRoom,2
8,6434553,M004,LivingRoom,Chair,ON,Control4-Motion,2014-04-04 21:04:51.069809,True,LivingRoom,LivingRoom,3
9,6434554,M004,LivingRoom,Chair,OFF,Control4-Motion,2014-04-04 21:04:53.070743,False,,LivingRoom,3


In [9]:
# To properly handle the end of a stay, we need to know the *next* event that changes the state.
# For each 'stay_group_start', the start time is the first timestamp in that group.
# The end time is the timestamp of the first event of the *next* group, or the end of the data.

stay_periods = []

# Group by the identified stay_group_start blocks
grouped_stays = motion_df.groupby('stay_group_start')
grouped_stays.head(10)

Unnamed: 0.1,Unnamed: 0,SensorID,Attribute1,Attribute2,Value,SensorType,Timestamp,IsMotion,ActiveRoomCandidate,CurrentActiveRoom,stay_group_start
0,6434541,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:44.941567,True,Kitchen,Kitchen,1
1,6434543,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:46.976946,False,,Kitchen,1
2,6434544,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:53.932224,True,Kitchen,Kitchen,1
3,6434546,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:55.039010,False,,Kitchen,1
4,6434547,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:56.700078,True,Kitchen,Kitchen,1
...,...,...,...,...,...,...,...,...,...,...,...
2817,6440120,M005,DiningRoom,DiningRoom,OFF,Control4-Motion,2014-04-05 18:41:27.041536,False,,DiningRoom,396
2818,6440122,M005,DiningRoom,DiningRoom,ON,Control4-Motion,2014-04-05 18:41:30.754326,True,DiningRoom,DiningRoom,396
2819,6440123,M005,DiningRoom,DiningRoom,OFF,Control4-Motion,2014-04-05 18:41:31.852745,False,,DiningRoom,396
2820,6440124,M005,DiningRoom,DiningRoom,ON,Control4-Motion,2014-04-05 18:41:39.380120,True,DiningRoom,DiningRoom,396


In [14]:
# To properly handle the end of a stay, we need to know the *next* event that changes the state.
# For each 'stay_group_start', the start time is the first timestamp in that group.
# The end time is the timestamp of the first event of the *next* group, or the end of the data.

room_stay_periods = []

# Group by the identified stay_group_start blocks
grouped_stays = motion_df.groupby('stay_group_start')

for group_id, group in grouped_stays:
    room = group['CurrentActiveRoom'].iloc[0] # The room for this stay block
    start_time = group['Timestamp'].iloc[0] # Start of the block

    # Determine the end time for this block
    # It's the timestamp of the next group's first event, or the max timestamp if it's the last group
    if group_id < motion_df['stay_group_start'].max():
        next_group_start_time = motion_df[motion_df['stay_group_start'] == group_id + 1]['Timestamp'].iloc[0]
        end_time = next_group_start_time
    else:
        end_time = motion_df['Timestamp'].max() # End of the entire dataset

    # Only include stays where motion was detected at some point in the group,
    # or where the room was inferred as active.
    # The `CurrentActiveRoom` logic already implies presence.
    
    # We should also ensure the stay only starts if the sensor indicates 'ON' in that room
    # or if the `CurrentActiveRoom` was activated by an 'ON' in the previous step.
    # Given how `CurrentActiveRoom` is derived from `ffill`, if it exists, it implies a recent ON.
    
    # Let's refine the group and duration calculation.
    # The start of a stay is the first 'ON' for a room.
    # The end of a stay is the first 'ON' in *another* room, or the last 'OFF' in the current room.
    
    
    room_stay_periods.append({
        'room': room,
        'start_time': start_time,
        'end_time': end_time
    })



In [15]:
print(room_stay_periods)

[{'room': 'Kitchen', 'start_time': Timestamp('2014-04-04 21:00:44.941567'), 'end_time': Timestamp('2014-04-04 21:00:58.212312')}, {'room': 'DiningRoom', 'start_time': Timestamp('2014-04-04 21:00:58.212312'), 'end_time': Timestamp('2014-04-04 21:04:51.069809')}, {'room': 'LivingRoom', 'start_time': Timestamp('2014-04-04 21:04:51.069809'), 'end_time': Timestamp('2014-04-04 21:09:55.781471')}, {'room': 'DiningRoom', 'start_time': Timestamp('2014-04-04 21:09:55.781471'), 'end_time': Timestamp('2014-04-04 21:10:41.732679')}, {'room': 'WorkArea', 'start_time': Timestamp('2014-04-04 21:10:41.732679'), 'end_time': Timestamp('2014-04-04 21:11:20.800523')}, {'room': 'DiningRoom', 'start_time': Timestamp('2014-04-04 21:11:20.800523'), 'end_time': Timestamp('2014-04-04 21:12:18.611104')}, {'room': 'Kitchen', 'start_time': Timestamp('2014-04-04 21:12:18.611104'), 'end_time': Timestamp('2014-04-04 21:12:19.333686')}, {'room': 'DiningRoom', 'start_time': Timestamp('2014-04-04 21:12:19.333686'), 'end_