In [1]:
import pandas as pd
import os
import io
import datetime

In [2]:
start_time = '2014-04-01'
# 1. Convert the_day string to a datetime object
date_obj = datetime.datetime.strptime(start_time, '%Y-%m-%d')
end_time = date_obj + datetime.timedelta(days=6)
print(end_time)
#end_time = '2014-04-07'
one_week_data = pd.read_csv('one_week_data.csv')
one_week_data['Timestamp'] = pd.to_datetime(one_week_data['Timestamp'])
# one_week_data.head(5)

2014-04-07 00:00:00


In [3]:
the_day = '2014-04-05'

# 1. Convert the_day string to a datetime object
date_obj = datetime.datetime.strptime(the_day, '%Y-%m-%d')

# 2. Calculate 9 PM of the given date
nine_pm_current_day = date_obj.replace(hour=21, minute=0, second=0, microsecond=0)

# 3. Calculate 9 PM of one day earlier
one_day_earlier = date_obj - datetime.timedelta(days=1)
nine_pm_one_day_earlier = one_day_earlier.replace(hour=21, minute=0, second=0, microsecond=0)

# 4. Get the timestamps (Unix timestamps)
timestamp_nine_pm_one_day_earlier = nine_pm_one_day_earlier.timestamp()
timestamp_nine_pm_current_day = nine_pm_current_day.timestamp()

print(f"Date used: {the_day}")
print(f"9 PM one day earlier ({nine_pm_one_day_earlier}): {timestamp_nine_pm_one_day_earlier}")
print(f"9 PM of the date ({nine_pm_current_day}): {timestamp_nine_pm_current_day}")

Date used: 2014-04-05
9 PM one day earlier (2014-04-04 21:00:00): 1396616400.0
9 PM of the date (2014-04-05 21:00:00): 1396702800.0


In [4]:
one_day_data = one_week_data[(one_week_data['Timestamp'] >= nine_pm_one_day_earlier) 
    & (one_week_data['Timestamp'] <= nine_pm_current_day)]

In [13]:
bedroom_sensors = None
living_room_sensors = None
work_area_sensors = None
other_sleep_sensors = None
sleep_start_hour = 20  # 8 PM
sleep_end_hour = 10    # 10 AM
min_inactive_duration = 30  # minutes
min_sleep_duration = 180    # 3 hours minimum
couch_sleep_min_duration = 60  # 1 hour minimum for couch sleep


In [7]:
all_sleep_sensors = bedroom_sensors + living_room_sensors + work_area_sensors + other_sleep_sensors

In [9]:
# Extract location information from Attribute1 and Attribute2
data = one_day_data.copy()
data['Location'] = data['Attribute1'].fillna('') + '_' + data['Attribute2'].fillna('')
data['Location'] = data['Location'].str.replace('_Ignore', '').str.replace('Ignore_', '')
data['Location'] = data['Location'].str.replace('__', '_').str.strip('_')

# Filter motion sensors only
motion_data = data[data['SensorType'].str.contains('Motion', case=False, na=False)].copy()

# Sort by timestamp
motion_data = motion_data.sort_values('Timestamp').reset_index(drop=True)

### detect_activity_periods

In [15]:
df = motion_data.copy()
df['Date'] = df['Timestamp'].dt.date
df = df.sort_values(['Date', 'Timestamp']).reset_index(drop=True)

# Calculate time differences between consecutive events
df['TimeDiff'] = df.groupby('Date')['Timestamp'].diff()

# Identify inactive periods (gaps > min_inactive_duration)
inactive_mask = df['TimeDiff'] > pd.Timedelta(minutes=min_inactive_duration)
inactive_periods = df[inactive_mask].copy()

if inactive_periods.empty:
    print(f'no inactive periods')
    
# Get previous timestamps for inactive period starts
inactive_periods['InactiveStart'] = df.loc[inactive_periods.index - 1, 'Timestamp'].values
inactive_periods['InactiveEnd'] = inactive_periods['Timestamp']
inactive_periods['Duration'] = (
    inactive_periods['InactiveEnd'] - inactive_periods['InactiveStart']
).dt.total_seconds() / 60

# Select and rename columns
result = inactive_periods[['Date', 'InactiveStart', 'InactiveEnd', 'Duration']].copy()
result['Type'] = 'Inactive'
result.head()

Unnamed: 0,Date,InactiveStart,InactiveEnd,Duration,Type
210,2014-04-04,2014-04-04 22:22:15.840493,2014-04-04 22:52:18.855923,30.050257,Inactive
424,2014-04-05,2014-04-05 00:08:29.471459,2014-04-05 01:29:48.294037,81.31371,Inactive
432,2014-04-05,2014-04-05 01:33:13.697524,2014-04-05 06:37:46.316029,304.543642,Inactive
588,2014-04-05,2014-04-05 09:07:16.375671,2014-04-05 10:05:39.063527,58.378131,Inactive
3140,2014-04-05,2014-04-05 13:17:18.155352,2014-04-05 15:00:58.210138,103.66758,Inactive


### Detect Sleep period
Detect sleep periods across multiple locations using optimized pandas operations

In [21]:
# Get activity periods
activity_periods = result.copy()

if activity_periods.empty:
    print(f'no inactive periods')

motion_df = motion_data.copy()
motion_df['Date'] = motion_df['Timestamp'].dt.date
motion_df = motion_df.sort_values(['Date', 'Timestamp']).reset_index(drop=True)

# Vectorized sleep location determination
# Group motion data by date for efficient lookup
motion_by_date = motion_df.groupby('Date')

sleep_locations_list = []

for _, period in activity_periods.iterrows():
    # Get motion data for this date
    date_motion = motion_by_date.get_group(period['Date'])
    # Find motion in 5-minute window before inactivity
    time_window = period['InactiveStart'] - pd.Timedelta(minutes=5)
    recent_motion = date_motion[
        (date_motion['Timestamp'] >= time_window) & 
        (date_motion['Timestamp'] <= period['InactiveStart'])
    ]
    
    if recent_motion.empty:
        sleep_locations_list.append('unknown')
    else:
        # Get the most recent sensor
        last_sensor = recent_motion.iloc[-1]['SensorID']
        sleep_locations_list.append(recent_motion.iloc[-1]['Attribute1'])

# # Apply sleep criteria using vectorized operations
# sleep_periods = self._apply_sleep_criteria_vectorized(activity_periods)

### Apply sleep criteria using vectorized operations

In [34]:
# Add time-based features
activity_periods = activity_periods.copy()
activity_periods['StartHour'] = activity_periods['InactiveStart'].dt.hour
activity_periods['SleepDurationHours'] = activity_periods['Duration'] / 60
activity_periods['SleepLocation'] = sleep_locations_list

# Vectorized minimum duration thresholds
duration_map = {
    'bedroom': 180,
    'LivingRoom': 60,
    'work_area': 30,
    'other': 30,
    'unknown': 30
}
activity_periods['MinDuration'] = activity_periods['SleepLocation'].map(duration_map)
# Vectorized time criteria
night_time_mask = (
    (activity_periods['StartHour'] >= sleep_start_hour) | 
    (activity_periods['StartHour'] <= sleep_end_hour)
)

nap_time_mask = (
    (activity_periods['StartHour'] >= 13) & 
    (activity_periods['StartHour'] <= 19) & 
    (activity_periods['Duration'] >= 60)
)

evening_couch_mask = (
    (activity_periods['StartHour'] >= 18) & 
    (activity_periods['StartHour'] <= 23) & 
    (activity_periods['Duration'] >= 90)
)

time_criteria_mask = night_time_mask | nap_time_mask | evening_couch_mask
duration_criteria_mask = activity_periods['Duration'] >= activity_periods['MinDuration']

# Filter for sleep periods
sleep_mask = time_criteria_mask & duration_criteria_mask
sleep_periods = activity_periods[sleep_mask].copy()

if sleep_periods.empty:
    print(f'no sleep period')
sleep_periods.head()

Unnamed: 0,Date,InactiveStart,InactiveEnd,Duration,Type,StartHour,SleepDurationHours,SleepLocation,MinDuration
424,2014-04-05,2014-04-05 00:08:29.471459,2014-04-05 01:29:48.294037,81.31371,Inactive,0,1.355228,LivingRoom,60
432,2014-04-05,2014-04-05 01:33:13.697524,2014-04-05 06:37:46.316029,304.543642,Inactive,1,5.075727,LivingRoom,60
3140,2014-04-05,2014-04-05 13:17:18.155352,2014-04-05 15:00:58.210138,103.66758,Inactive,13,1.727793,LivingRoom,60
