In [1]:
import pandas as pd
import os
import io
import datetime

In [2]:
start_time = '2014-04-01'
end_time = '2014-04-07'
one_week_data = pd.read_csv('one_week_data.csv')
one_week_data['Timestamp'] = pd.to_datetime(one_week_data['Timestamp'])
one_week_data.head(5)

Unnamed: 0.1,Unnamed: 0,SensorID,Attribute1,Attribute2,Value,SensorType,Timestamp
0,6431809,BATP105,Ignore,Ignore,36,Control4-BatteryPercent,2014-04-01 02:43:32.162533
1,6431810,BATP103,Ignore,Ignore,48,Control4-BatteryPercent,2014-04-01 03:21:14.529900
2,6431811,T105,Ignore,KitchenTemp,25,Control4-Temperature,2014-04-01 04:12:48.248098
3,6431812,BATP022,Ignore,Ignore,42,Control4-BatteryPercent,2014-04-01 05:09:01.748707
4,6431813,BATP104,Ignore,Ignore,45,Control4-BatteryPercent,2014-04-01 05:48:18.736504


In [3]:
one_week_data.columns

Index(['Unnamed: 0', 'SensorID', 'Attribute1', 'Attribute2', 'Value',
       'SensorType', 'Timestamp'],
      dtype='object')

In [4]:
the_day = '2014-04-05'

# 1. Convert the_day string to a datetime object
date_obj = datetime.datetime.strptime(the_day, '%Y-%m-%d')

# 2. Calculate 9 PM of the given date
nine_pm_current_day = date_obj.replace(hour=21, minute=0, second=0, microsecond=0)

# 3. Calculate 9 PM of one day earlier
one_day_earlier = date_obj - datetime.timedelta(days=1)
nine_pm_one_day_earlier = one_day_earlier.replace(hour=21, minute=0, second=0, microsecond=0)

# 4. Get the timestamps (Unix timestamps)
timestamp_nine_pm_one_day_earlier = nine_pm_one_day_earlier.timestamp()
timestamp_nine_pm_current_day = nine_pm_current_day.timestamp()

print(f"Date used: {the_day}")
print(f"9 PM one day earlier ({nine_pm_one_day_earlier}): {timestamp_nine_pm_one_day_earlier}")
print(f"9 PM of the date ({nine_pm_current_day}): {timestamp_nine_pm_current_day}")



Date used: 2014-04-05
9 PM one day earlier (2014-04-04 21:00:00): 1396616400.0
9 PM of the date (2014-04-05 21:00:00): 1396702800.0


In [5]:
one_day_data = one_week_data[(one_week_data['Timestamp'] >= nine_pm_one_day_earlier) 
    & (one_week_data['Timestamp'] <= nine_pm_current_day)]
one_day_data.head(10)

Unnamed: 0.1,Unnamed: 0,SensorID,Attribute1,Attribute2,Value,SensorType,Timestamp
2729,6434538,LS008,Ignore,Ignore,24,Control4-LightSensor,2014-04-04 21:00:36.206586
2730,6434539,BATP008,Ignore,Ignore,63,Control4-BatteryPercent,2014-04-04 21:00:36.258214
2731,6434540,LS007,Ignore,Ignore,8,Control4-LightSensor,2014-04-04 21:00:44.880928
2732,6434541,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:44.941567
2733,6434542,LS007,Ignore,Ignore,9,Control4-LightSensor,2014-04-04 21:00:46.945409
2734,6434543,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:46.976946
2735,6434544,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:53.932224
2736,6434545,LS007,Ignore,Ignore,8,Control4-LightSensor,2014-04-04 21:00:55.007836
2737,6434546,M007,Kitchen,Kitchen,OFF,Control4-Motion,2014-04-04 21:00:55.039010
2738,6434547,M007,Kitchen,Kitchen,ON,Control4-Motion,2014-04-04 21:00:56.700078


In [13]:
# 1. Convert 'Timestamp' to datetime objects
# df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# 2. Sort by door identifiers and Timestamp for correct cumulative sum
door_data = door_data.sort_values(by=['SensorID', 'Attribute1', 'Attribute2', 'Timestamp']).reset_index(drop=True)

# 3. Assign numerical values: OPEN = 1, CLOSE = -1
#    Other 'Value' types (if any) will become 0, not affecting the sum
door_data['event_val'] = door_data['Value'].map({'OPEN': 1, 'CLOSE': -1}).fillna(0)

# 4. Calculate cumulative sum of event_val within each door group
#    This 'current_state' represents the net number of 'OPEN' events that are active.
door_data['current_state'] = door_data.groupby(['SensorID', 'Attribute1', 'Attribute2'])['event_val'].cumsum()

# --- Detection Logic ---

# 5. Detect 'CLOSE' events without a preceding 'OPEN' (state drops below 0)
#    These are events where `current_state` becomes negative.
unmatched_closes = door_data[door_data['current_state'] < 0]
if not unmatched_closes.empty:
    print("Warning: 'CLOSE' events found without a preceding 'OPEN' for the same door:")
    print(unmatched_closes[['SensorID', 'Attribute1', 'Attribute2', 'Value', 'Timestamp', 'current_state']].to_markdown(index=False, numalign="left", stralign="left"))
    # To ensure subsequent 'OPEN' unmatched detection is not skewed,
    # you might want to "correct" the state by clipping it to 0 for these groups
    # df['current_state'] = df.groupby(['SensorID', 'Attribute1', 'Attribute2'])['event_val'].transform(lambda x: x.cumsum().clip(lower=0))
    # For this problem, we'll just report and continue.

# 6. Detect 'OPEN' events without a subsequent 'CLOSE' (final state for a door > 0)
#    Get the last state for each door group
last_states_per_door = door_data.groupby(['SensorID', 'Attribute1', 'Attribute2']).agg(
    final_state=('current_state', 'last'),
    last_timestamp=('Timestamp', 'last')
)

unmatched_open_doors_summary = last_states_per_door[last_states_per_door['final_state'] > 0]

if not unmatched_open_doors_summary.empty:
    print("\nDoor(s) with unmatched 'OPEN' events (not all OPENs have a corresponding CLOSE):")
    print(unmatched_open_doors_summary[['final_state', 'last_timestamp']].to_markdown(numalign="left", stralign="left"))

    # To get the *specific* 'OPEN' events that are unmatched:
    # If a door's final_state is N > 0, it means the last N 'OPEN' events are effectively unmatched.
    unmatched_open_events_list = []
    for (sensor_id, attr1, attr2), row_data in unmatched_open_doors_summary.iterrows():
        num_unmatched = int(row_data['final_state'])

        # Get all 'OPEN' events for this specific door, sorted in reverse chronological order
        door_open_events = df[
            (df['SensorID'] == sensor_id) &
            (df['Attribute1'] == attr1) &
            (df['Attribute2'] == attr2) &
            (df['Value'] == 'OPEN')
        ].sort_values('Timestamp', ascending=False)

        # Append the last 'num_unmatched' OPEN events to our list
        if not door_open_events.empty:
            unmatched_open_events_list.append(door_open_events.head(num_unmatched))

    if unmatched_open_events_list:
        detailed_unmatched_df = pd.concat(unmatched_open_events_list).sort_values('Timestamp')
        print("\nDetailed list of specific unmatched 'OPEN' events:")
        print(detailed_unmatched_df[['SensorID', 'Attribute1', 'Attribute2', 'Value', 'Timestamp']].to_markdown(index=False, numalign="left", stralign="left"))
else:
    print("\nAll 'OPEN' events have a corresponding 'CLOSE' event, and no 'CLOSE' events occurred without a preceding 'OPEN'.")


All 'OPEN' events have a corresponding 'CLOSE' event, and no 'CLOSE' events occurred without a preceding 'OPEN'.


In [15]:
last_states_per_door.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,final_state,last_timestamp
SensorID,Attribute1,Attribute2,Unnamed: 3_level_1,Unnamed: 4_level_1
D002,OutsideDoor,FrontDoor,0,2014-04-05 18:42:21.015943


In [8]:
door_data = one_day_data[one_day_data['SensorType'] == 'Control4-Door']
door_data.head()
door_data = one_day_data[(one_day_data['Timestamp'] >'2014-04-05 09:03:39.246137') & (one_day_data['Timestamp'] <'2014-04-05 09:10:39.246137')]
door_data.head()

Unnamed: 0.1,Unnamed: 0,SensorID,Attribute1,Attribute2,Value,SensorType,Timestamp
3480,6435289,LS015,Ignore,Ignore,4,Control4-LightSensor,2014-04-05 09:05:35.352389
3481,6435290,LS004,Ignore,Ignore,15,Control4-LightSensor,2014-04-05 09:05:36.583969
3482,6435291,M004,LivingRoom,Chair,ON,Control4-Motion,2014-04-05 09:05:36.635723
3483,6435292,M004,LivingRoom,Chair,OFF,Control4-Motion,2014-04-05 09:05:38.272640
3484,6435293,M005,DiningRoom,DiningRoom,ON,Control4-Motion,2014-04-05 09:06:06.014020


### detect if a person leave home or not
o detect if a person leaves the house based on your sensor data, you can use the following Python code. This code assumes an "open event" is recorded with a SensorType of 'DoorSensor' and a Value of 'Open'. It then checks for motion (specifically, Control4-Motion with Value 'ON') before this open event and an absence of motion after it, within a specified time window.

In [8]:
def detect_person_leaving(df, time_window_after_open_seconds=300):
    """
    Detects if a person leaves the house based on sensor data.

    Args:
        df (pd.DataFrame): The input DataFrame with sensor data. It must contain
                           'Timestamp', 'SensorType', and 'Value' columns.
        time_window_after_open_seconds (int): The time window in seconds
                                              to check for no motion after an 'open' event.

    Returns:
        list: A list of dictionaries, where each dictionary represents a detected
              person leaving event, including the 'open_timestamp' and 'SensorID'.
    """
    leaving_events = []

    # Filter for 'open' events (assuming SensorType 'DoorSensor' and Value 'Open')
    # You might need to adjust this filter based on your actual 'open' event definition.
    open_events = df[(df['SensorType'] == 'DoorSensor') & (df['Value'] == 'Open')]

    for index, open_event in open_events.iterrows():
        open_timestamp = open_event['Timestamp']
        open_sensor_id = open_event['SensorID']

        # Check for motion BEFORE the open event
        # We consider 'ON' value for 'Control4-Motion' as an indicator of motion
        motion_before_open = df[
            (df['SensorType'] == 'Control4-Motion') &
            (df['Value'] == 'ON') &
            (df['Timestamp'] < open_timestamp)
        ]

        # Check for NO motion AFTER the open event within the specified time window
        # This means there should be no 'ON' motion events after the open_timestamp
        # up to open_timestamp + time_window_after_open_seconds
        no_motion_after_open = df[
            (df['SensorType'] == 'Control4-Motion') &
            (df['Value'] == 'ON') &
            (df['Timestamp'] > open_timestamp) &
            (df['Timestamp'] <= open_timestamp + pd.Timedelta(seconds=time_window_after_open_seconds))
        ]

        # If there were motion events before the open event, AND no motion events
        # after the open event within the specified window, a person likely left.
        if not motion_before_open.empty and no_motion_after_open.empty:
            leaving_events.append({
                'open_timestamp': open_timestamp,
                'open_sensor_id': open_sensor_id,
                'description': f"Person likely left the house around {open_timestamp} via {open_sensor_id}"
            })
    return leaving_events


# Sort the DataFrame by Timestamp to ensure correct chronological processing
one_day_data = one_day_data.sort_values(by='Timestamp').reset_index(drop=True)

# Detect person leaving events
# You can adjust the time_window_after_open_seconds as needed.
# For example, 300 seconds = 5 minutes.
detected_leaving_events = detect_person_leaving(one_day_data, time_window_after_open_seconds=300)

if detected_leaving_events:
    for event in detected_leaving_events:
        print(event['description'])
else:
    print("No person leaving events detected based on the criteria.")

No person leaving events detected based on the criteria.


In [9]:
# Sort the DataFrame by Timestamp to ensure correct chronological processing
one_day_data = one_day_data.sort_values(by='Timestamp').reset_index(drop=True)

def detect_person_leaving(df_input, time_window_after_open_seconds=300):
    """
    Detects if a person leaves the house based on sensor data.

    Args:
        df_input (pd.DataFrame): The input DataFrame with sensor data. It must contain
                               'Timestamp', 'SensorType', and 'Value' columns.
        time_window_after_open_seconds (int): The time window in seconds
                                              to check for no motion after an 'open' event.

    Returns:
        list: A list of dictionaries, where each dictionary represents a detected
              person leaving event, including the 'open_timestamp' and 'SensorID'.
    """
    leaving_events = []

    # Filter for 'open' events (assuming SensorType 'DoorSensor' and Value 'Open')
    open_events = df_input[(df_input['SensorType'] == 'DoorSensor') & (df_input['Value'] == 'Open')]

    for index, open_event in open_events.iterrows():
        open_timestamp = open_event['Timestamp']
        open_sensor_id = open_event['SensorID']

        # Check for motion BEFORE the open event
        motion_before_open = df_input[
            (df_input['SensorType'] == 'Control4-Motion') &
            (df_input['Value'] == 'ON') &
            (df_input['Timestamp'] < open_timestamp)
        ]

        # Check for NO motion AFTER the open event within the specified time window
        no_motion_after_open = df_input[
            (df_input['SensorType'] == 'Control4-Motion') &
            (df_input['Value'] == 'ON') &
            (df_input['Timestamp'] > open_timestamp) &
            (df_input['Timestamp'] <= open_timestamp + pd.Timedelta(seconds=time_window_after_open_seconds))
        ]

        if not motion_before_open.empty and no_motion_after_open.empty:
            leaving_events.append({
                'open_timestamp': open_timestamp,
                'open_sensor_id': open_sensor_id,
                'description': f"Person likely left the house around {open_timestamp} via {open_sensor_id}"
            })
    return leaving_events

# --- Example Usage ---

# 1. Processing the entire DataFrame (as in the original solution)
# This approach converts the Timestamp column for the whole DataFrame at once,
# so the SettingWithCopyWarning is avoided in subsequent slicing operations for analysis.
print("--- Processing entire DataFrame ---")
detected_leaving_events = detect_person_leaving(one_day_data, time_window_after_open_seconds=300)

if detected_leaving_events:
    for event in detected_leaving_events:
        print(event['description'])
else:
    print("No person leaving events detected based on the criteria for the entire dataset.")

print("\n--- Demonstrating SettingWithCopyWarning fix (if you process data by segments) ---")


--- Processing entire DataFrame ---
No person leaving events detected based on the criteria for the entire dataset.



In [None]:
# 2. If you were processing data by specific segments (e.g., day by day)
# and your original df['Timestamp'] was NOT already converted to datetime,
# this is where the warning would likely occur.

# Simulate a scenario where 'df_raw' has 'Timestamp' as string
df_raw_example = pd.DataFrame(data) # Recreate df with string timestamps for this example

# Get unique dates to iterate
unique_dates = df_raw_example['Timestamp'].apply(lambda x: x.split(' ')[0]).unique()

for date_str in unique_dates:
    print(f"\nProcessing data for date: {date_str}")
    # Incorrect way (might raise SettingWithCopyWarning if df_raw_example['Timestamp'] is not datetime yet)
    # one_day_data = df_raw_example[df_raw_example['Timestamp'].str.startswith(date_str)]
    # one_day_data['Timestamp'] = pd.to_datetime(one_day_data['Timestamp']) # This line would cause the warning

    # Correct way: Use .copy() to ensure you're working on an independent DataFrame
    one_day_data = df_raw_example[df_raw_example['Timestamp'].str.startswith(date_str)].copy()
    one_day_data['Timestamp'] = pd.to_datetime(one_day_data['Timestamp']) # No warning here!
    
    # Sort for consistent processing within the day
    one_day_data = one_day_data.sort_values(by='Timestamp').reset_index(drop=True)

    # Now you can use one_day_data for detection
    daily_leaving_events = detect_person_leaving(one_day_data, time_window_after_open_seconds=30)
    if daily_leaving_events:
        for event in daily_leaving_events:
            print(event['description'])
    else:
        print(f"No person leaving events detected for {date_str}.")