# Smart Home Appliance Usage Forecasting
## 04 - Forecasting Model (ARIMA)

This notebook builds a time series forecasting model for appliance usage using ARIMA.

In [None]:
# Install pmdarima if not already installed
# !pip install pmdarima

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-8, None))) * 100

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

## Load Feature-Enhanced Data

In [None]:
df = pd.read_csv('../data/processed/iot_logs_features.csv', parse_dates=['timestamp'])
df.head()

## Select Appliance and Prepare Data for ARIMA

We will forecast usage for a single appliance (e.g., Air Conditioner, user 101).

In [None]:
appliance = 'Air Conditioner'
user_id = 101

df_appliance = df[(df['appliance_name'] == appliance) & (df['user_id'] == user_id)]
df_daily = df_appliance.resample('D', on='timestamp').usage.sum().reset_index()
df_daily = df_daily.set_index('timestamp')
df_daily.head()

## Train/Test Split

In [None]:
train = df_daily.iloc[:-7]
test = df_daily.iloc[-7:]
print(f"Train: {train.shape}, Test: {test.shape}")

## Auto ARIMA Model Selection

In [None]:
# Use auto_arima to find the best parameters
stepwise_model = auto_arima(train['usage'], seasonal=True, m=7, trace=True, error_action='ignore', suppress_warnings=True)
print(stepwise_model.summary())

## Fit SARIMAX Model and Forecast

In [None]:
model = SARIMAX(train['usage'], order=stepwise_model.order, seasonal_order=stepwise_model.seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit(disp=False)

# Forecast for the test period
forecast = model_fit.get_forecast(steps=7)
forecast_index = test.index
forecast_values = forecast.predicted_mean
forecast_ci = forecast.conf_int()

# Prepare for plotting
forecast_series = pd.Series(forecast_values.values, index=forecast_index)

## Visualize Forecast vs. Actuals

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df_daily.index, df_daily['usage'], label='Actual')
plt.plot(forecast_series.index, forecast_series.values, label='ARIMA Forecast', color='red')
plt.axvspan(test.index.min(), test.index.max(), color='orange', alpha=0.1, label='Test Period')
plt.fill_between(forecast_index, forecast_ci.iloc[:,0], forecast_ci.iloc[:,1], color='pink', alpha=0.3, label='Confidence Interval')
plt.legend()
plt.title(f'ARIMA Forecast for {appliance} (User {user_id})')
plt.xlabel('Date')
plt.ylabel('Usage (kWh)')
plt.show()

## Model Evaluation (MAE, RMSE, MAPE) on Test Set

In [None]:
mae = mean_absolute_error(test['usage'], forecast_series)
rmse = mean_squared_error(test['usage'], forecast_series, squared=False)
mape = mean_absolute_percentage_error(test['usage'], forecast_series)

print(f'MAE: {mae:.3f}')
print(f'RMSE: {rmse:.3f}')
print(f'MAPE: {mape:.2f}%')