# Smart Home Appliance Usage Forecasting
## 02 - Feature Engineering

This notebook creates new features from the IoT appliance usage data to improve forecasting models.

In [121]:
# Imports
import pandas as pd
import numpy as np
import os
os.makedirs('../data/external', exist_ok=True)

## Load Data

In [122]:
df = pd.read_csv('../data/raw/synthetic_iot_logs.csv', parse_dates=['timestamp'])
df = df.sort_values('timestamp')
df.head()

Unnamed: 0,timestamp,appliance_id,appliance_name,usage,user_id
0,2025-05-29 23:35:52.423050,1,Air Conditioner,0.874,101
37453,2025-05-29 23:35:52.423050,3,Refrigerator,0.079,104
5762,2025-05-29 23:35:52.423050,1,Air Conditioner,0.739,103
31691,2025-05-29 23:35:52.423050,3,Refrigerator,0.062,102
11524,2025-05-29 23:35:52.423050,1,Air Conditioner,0.775,105


In [123]:
# Load weather data if available
weather_path = '../data/external/weather_data.csv'
if os.path.exists(weather_path):
    weather = pd.read_csv(weather_path, parse_dates=['date'])
    # If weather has 'dt' or 'timestamp', convert to 'date'
    if 'dt' in weather.columns:
        weather['date'] = pd.to_datetime(weather['dt']).dt.date
    elif 'timestamp' in weather.columns:
        weather['date'] = pd.to_datetime(weather['timestamp']).dt.date
    else:
        weather['date'] = pd.to_datetime(weather['date']).dt.date

    # Merge weather with IoT data on date
    df['date'] = df['timestamp'].dt.date
    df = df.merge(weather, on='date', how='left')
    print("Weather data merged. Columns now:", df.columns)
else:
    print("No weather data found at", weather_path)

Weather data merged. Columns now: Index(['timestamp', 'appliance_id', 'appliance_name', 'usage', 'user_id',
       'date', 'temp', 'temp_min', 'temp_max', 'precipitation', 'wind_speed'],
      dtype='object')


## Time-based Features

In [124]:
df['hour'] = df['timestamp'].dt.hour
df['dayofweek'] = df['timestamp'].dt.dayofweek
df['is_weekend'] = df['dayofweek'] >= 5
df['month'] = df['timestamp'].dt.month
df.head()

Unnamed: 0,timestamp,appliance_id,appliance_name,usage,user_id,date,temp,temp_min,temp_max,precipitation,wind_speed,hour,dayofweek,is_weekend,month
0,2025-05-29 23:35:52.423050,1,Air Conditioner,0.874,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5
1,2025-05-29 23:35:52.423050,3,Refrigerator,0.079,104,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5
2,2025-05-29 23:35:52.423050,1,Air Conditioner,0.739,103,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5
3,2025-05-29 23:35:52.423050,3,Refrigerator,0.062,102,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5
4,2025-05-29 23:35:52.423050,1,Air Conditioner,0.775,105,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5


## Lag Features (previous usage)

In [125]:
df['usage_lag_1'] = df.groupby(['appliance_id', 'user_id'])['usage'].shift(1)
df['usage_lag_4'] = df.groupby(['appliance_id', 'user_id'])['usage'].shift(4)  # 1 hour ago (4x15min)
df['usage_lag_96'] = df.groupby(['appliance_id', 'user_id'])['usage'].shift(96)  # 1 day ago
df.head(10)

Unnamed: 0,timestamp,appliance_id,appliance_name,usage,user_id,date,temp,temp_min,temp_max,precipitation,wind_speed,hour,dayofweek,is_weekend,month,usage_lag_1,usage_lag_4,usage_lag_96
0,2025-05-29 23:35:52.423050,1,Air Conditioner,0.874,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
1,2025-05-29 23:35:52.423050,3,Refrigerator,0.079,104,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
2,2025-05-29 23:35:52.423050,1,Air Conditioner,0.739,103,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
3,2025-05-29 23:35:52.423050,3,Refrigerator,0.062,102,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
4,2025-05-29 23:35:52.423050,1,Air Conditioner,0.775,105,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
5,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,105,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
6,2025-05-29 23:35:52.423050,2,Washing Machine,0.434,102,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
7,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,103,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
8,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,
9,2025-05-29 23:35:52.423050,3,Refrigerator,0.07,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,


## Rolling Statistics

In [126]:
df['usage_roll_mean_4'] = df.groupby(['appliance_id', 'user_id'])['usage'].transform(lambda x: x.rolling(window=4, min_periods=1).mean())
df['usage_roll_sum_96'] = df.groupby(['appliance_id', 'user_id'])['usage'].transform(lambda x: x.rolling(window=96, min_periods=1).sum())
df.head(10)

Unnamed: 0,timestamp,appliance_id,appliance_name,usage,user_id,date,temp,temp_min,temp_max,precipitation,wind_speed,hour,dayofweek,is_weekend,month,usage_lag_1,usage_lag_4,usage_lag_96,usage_roll_mean_4,usage_roll_sum_96
0,2025-05-29 23:35:52.423050,1,Air Conditioner,0.874,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.874,0.874
1,2025-05-29 23:35:52.423050,3,Refrigerator,0.079,104,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.079,0.079
2,2025-05-29 23:35:52.423050,1,Air Conditioner,0.739,103,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.739,0.739
3,2025-05-29 23:35:52.423050,3,Refrigerator,0.062,102,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.062,0.062
4,2025-05-29 23:35:52.423050,1,Air Conditioner,0.775,105,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.775,0.775
5,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,105,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.0,0.0
6,2025-05-29 23:35:52.423050,2,Washing Machine,0.434,102,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.434,0.434
7,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,103,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.0,0.0
8,2025-05-29 23:35:52.423050,2,Washing Machine,0.0,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.0,0.0
9,2025-05-29 23:35:52.423050,3,Refrigerator,0.07,101,2025-05-29,33.5,29.4,38.7,0.0,6.5,23,3,False,5,,,,0.07,0.07


In [127]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [128]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [129]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [130]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [131]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [132]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [133]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [134]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [135]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [136]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [137]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [138]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [139]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [140]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [141]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [142]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [143]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [144]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [145]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


## Save Feature-Enhanced Data

In [146]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [147]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [148]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv


In [149]:
df.to_csv('../data/processed/iot_logs_features.csv', index=False)
print('Feature-enhanced data saved to ../data/processed/iot_logs_features.csv')

Feature-enhanced data saved to ../data/processed/iot_logs_features.csv
