# Prediction Model

## Energy Prediction Model

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import scipy
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Load the Feature engineered Dataset and parse the timestamp column
df_fe = pd.read_csv('dataset/HVAC Energy Data Feature Engineered.csv')
timestamp_column = 'Local Time (Timezone : GMT+8h)'
df_fe[timestamp_column] = pd.to_datetime(df_fe[timestamp_column])
df_fe = df_fe.set_index(timestamp_column)

### Prepare data for training

In [None]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import time

# Define target and features
target = 'Chiller Energy Consumption (kWh)'

# Select relevant features (exclude target and original temporal columns)
exclude_cols = [target, 'Hour', 'DayOfWeek', 'Month']  # Keep encoded versions
feature_cols = [col for col in df_fe.columns if col not in exclude_cols]

X = df_fe[feature_cols]
y = df_fe[target]

print(f"\n✓ Total samples: {len(X)}")
print(f"✓ Number of features: {len(feature_cols)}")
print(f"✓ Target variable: {target}")

# Train-test split (80-20, chronological)
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

print(f"\n✓ Training set: {len(X_train)} samples")
print(f"✓ Test set: {len(X_test)} samples")


✓ Total samples: 13609
✓ Number of features: 46
✓ Target variable: Chiller Energy Consumption (kWh)

✓ Training set: 10887 samples
✓ Test set: 2722 samples


In [11]:
# Save the test set for later evaluation
X_test.to_csv('dataset/X_test_optimization_engine.csv', index=True)
y_test.to_csv('dataset/y_test_optimization_engine.csv', index=True)

### XGBoost Model

In [4]:
print("\nTraining XGBoost model...")
start_time = time.time()

# Initialize XGBoost with simple, efficient parameters
xgb_model = XGBRegressor(
    n_estimators=200,      # Fewer trees for faster training
    learning_rate=0.1,
    max_depth=6,
    random_state=42,
    n_jobs=-1             # Use all CPU cores
)

# Train the model
xgb_model.fit(X_train, y_train)
train_time = time.time() - start_time

print(f"✓ Model trained in {train_time:.2f} seconds")

# Make predictions
y_train_pred_xgb = xgb_model.predict(X_train)
y_test_pred_xgb = xgb_model.predict(X_test)


Training XGBoost model...
✓ Model trained in 3.67 seconds


### Evaluation Metrics

In [5]:
# Calculate metrics
train_mae_xgb = mean_absolute_error(y_train, y_train_pred_xgb)
train_rmse_xgb = np.sqrt(mean_squared_error(y_train, y_train_pred_xgb))
train_r2_xgb = r2_score(y_train, y_train_pred_xgb)

test_mae_xgb = mean_absolute_error(y_test, y_test_pred_xgb)
test_rmse_xgb = np.sqrt(mean_squared_error(y_test, y_test_pred_xgb))
test_r2_xgb = r2_score(y_test, y_test_pred_xgb)

print("XGBOOST PERFORMANCE METRICS")
print(f"Training Set:")
print(f"  MAE:  {train_mae_xgb:.3f} kWh")
print(f"  RMSE: {train_rmse_xgb:.3f} kWh")
print(f"  R²:   {train_r2_xgb:.4f}")
print(f"\nTest Set:")
print(f"  MAE:  {test_mae_xgb:.3f} kWh")
print(f"  RMSE: {test_rmse_xgb:.3f} kWh")
print(f"  R²:   {test_r2_xgb:.4f}")

XGBOOST PERFORMANCE METRICS
Training Set:
  MAE:  0.632 kWh
  RMSE: 0.880 kWh
  R²:   0.9992

Test Set:
  MAE:  1.222 kWh
  RMSE: 2.405 kWh
  R²:   0.9842


## Temperature Forecasting model

### Prepare data

In [7]:
# Create future temperature target (1 hour ahead = 2 steps)
forecast_horizon = 2  # Predict 1 hour ahead
temp_target = 'Cooling Water Temperature (C)'

# Create shifted target
df_temp = df_fe.copy()
df_temp['Future_Temp'] = df_temp[temp_target].shift(-forecast_horizon)

# Remove rows with missing future values
df_temp = df_temp.dropna(subset=['Future_Temp'])

print(f"\n✓ Forecast horizon: {forecast_horizon} time steps (1 hour)")
print(f"✓ Target: Future {temp_target}")
print(f"✓ Dataset size after creating target: {len(df_temp)} samples")

# Select features for temperature prediction
temp_exclude = ['Future_Temp', temp_target, 'Hour', 'DayOfWeek', 'Month']
temp_features = [col for col in df_temp.columns if col not in temp_exclude]

X_temp = df_temp[temp_features]
y_temp = df_temp['Future_Temp']

print(f"\n✓ Number of features: {len(temp_features)}")

# Train-test split (80-20, chronological)
split_idx = int(len(X_temp) * 0.8)
X_train_temp, X_test_temp = X_temp[:split_idx], X_temp[split_idx:]
y_train_temp, y_test_temp = y_temp[:split_idx], y_temp[split_idx:]

print(f"\n✓ Training set: {len(X_train_temp)} samples")
print(f"✓ Test set: {len(X_test_temp)} samples")


✓ Forecast horizon: 2 time steps (1 hour)
✓ Target: Future Cooling Water Temperature (C)
✓ Dataset size after creating target: 13607 samples

✓ Number of features: 46

✓ Training set: 10885 samples
✓ Test set: 2722 samples


### XGBoost Model

In [8]:
import time
import xgboost as xgb

# Initialize XGBoost model for temperature forecasting
temp_model = xgb.XGBRegressor(
    n_estimators=150,
    learning_rate=0.1,
    max_depth=5,
    random_state=42,
    n_jobs=-1
)

# Train the model
start_time = time.time()
temp_model.fit(X_train_temp, y_train_temp)
training_time = time.time() - start_time

print(f"\n✓ Model training completed in {training_time:.2f} seconds")

# Make predictions
y_train_pred_temp = temp_model.predict(X_train_temp)
y_test_pred_temp = temp_model.predict(X_test_temp)


✓ Model training completed in 2.11 seconds


### Evaluation metrics

In [9]:
train_mae_temp = mean_absolute_error(y_train_temp, y_train_pred_temp)
train_rmse_temp = np.sqrt(mean_squared_error(y_train_temp, y_train_pred_temp))
train_r2_temp = r2_score(y_train_temp, y_train_pred_temp)

test_mae_temp = mean_absolute_error(y_test_temp, y_test_pred_temp)
test_rmse_temp = np.sqrt(mean_squared_error(y_test_temp, y_test_pred_temp))
test_r2_temp = r2_score(y_test_temp, y_test_pred_temp)

print("TEMPERATURE FORECASTING MODEL PERFORMANCE\n")

print("\nTRAINING SET PERFORMANCE:")
print(f"  • MAE (Temperature):  {train_mae_temp:.3f} °C")
print(f"  • RMSE (Temperature): {train_rmse_temp:.3f} °C")
print(f"  • R² Score:           {train_r2_temp:.4f}")

print("\nTEST SET PERFORMANCE:")
print(f"  • MAE (Temperature):  {test_mae_temp:.3f} °C")
print(f"  • RMSE (Temperature): {test_rmse_temp:.3f} °C")
print(f"  • R² Score:           {test_r2_temp:.4f}")

# Calculate accuracy within tolerance bands
temp_diff = np.abs(y_test_pred_temp - y_test_temp)
accuracy_1C = (temp_diff <= 1.0).mean() * 100
accuracy_05C = (temp_diff <= 0.5).mean() * 100

print("TEMPERATURE PREDICTION ACCURACY (TOLERANCE BANDS)")
print(f"\n✓ Predictions within ±1.0°C:  {accuracy_1C:.2f}%")
print(f"✓ Predictions within ±0.5°C:  {accuracy_05C:.2f}%")

TEMPERATURE FORECASTING MODEL PERFORMANCE


TRAINING SET PERFORMANCE:
  • MAE (Temperature):  0.331 °C
  • RMSE (Temperature): 0.459 °C
  • R² Score:           0.8615

TEST SET PERFORMANCE:
  • MAE (Temperature):  0.460 °C
  • RMSE (Temperature): 0.655 °C
  • R² Score:           0.6853
TEMPERATURE PREDICTION ACCURACY (TOLERANCE BANDS)

✓ Predictions within ±1.0°C:  89.24%
✓ Predictions within ±0.5°C:  66.90%


## Saving Trained models

In [10]:
import joblib

# Define the filename for saving the model
model_filename = 'trained_models/xgboost_energy_model.pkl'
model_filename_ = 'trained_models/xgboost_temperature_model.pkl'

# Save the trained XGBoost model
joblib.dump(xgb_model, model_filename)
joblib.dump(temp_model, model_filename_)

print(f"XGBoost models saved successfully to '{model_filename}' and '{model_filename_}'")

XGBoost models saved successfully to 'trained_models/xgboost_energy_model.pkl' and 'trained_models/xgboost_temperature_model.pkl'
