# Exponential Smoothing
## With Grid search Hyperparameters training

In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('~/Desktop/DATA_PROJECT/HSG_BA_and_DS_Applications/data/processed/final_df.csv', parse_dates=True, index_col=0)

# Ensure the index is in datetime format
df.index = pd.to_datetime(df.index)
df_copy =df.copy()

In [2]:
# Example for one location
location = 'Little Collins St-Swanston St (East)'

# Prepare the data for Prophet (from the copied dataset)
data = df_copy[['Hour', location, 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h', 'clouds_all']].rename(
    columns={'Hour': 'ds', location: 'y'}
)

# Convert 'ds' to datetime format
data['ds'] = pd.to_datetime(data['ds'])

In [4]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Define evaluation metrics
def calculate_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    y_true_nonzero = np.where(y_true == 0, np.nan, y_true)  # Avoid divide-by-zero for MAPE
    mape = (np.abs((y_true - y_pred) / y_true_nonzero)).mean() * 100  # Exclude NaNs
    r2 = r2_score(y_true, y_pred)
    return rmse, mape, r2

# Load the dataset
df = pd.read_csv('../data/processed/final_df.csv', parse_dates=True, index_col=0)
df.index = pd.to_datetime(df.index)
df_copy = df.copy()

# Locations to process
locations = [
    'Little Collins St-Swanston St (East)',
    'Faraday St-Lygon St (West)',
    'Melbourne Central',
    'Chinatown-Lt Bourke St (South)',
    'Lonsdale St (South)'
]

results = {}  # To store evaluation metrics
best_params = {}  # To store the best hyperparameters for each location

# Define hyperparameter grid for Exponential Smoothing
trend_options = [None, 'add', 'mul']  # Trend component: None, additive, or multiplicative
seasonal_options = [None, 'add', 'mul']  # Seasonal component: None, additive, or multiplicative
seasonal_periods = [24, 7 * 24]  # Hourly (daily) and weekly seasonalities

# Process each location
for location in locations:
    print(f"Processing {location}...")

    # Prepare data for the current location
    data = df_copy[['Hour', location]].rename(columns={'Hour': 'ds', location: 'y'})
    data['ds'] = pd.to_datetime(data['ds'])
    data = data[data['y'] > 0]  # Remove zero counts for stability

    # Split into training and testing (last 16 days for testing)
    split_index = int(len(data) - 16 * 24)
    train_data = data.iloc[:split_index]
    test_data = data.iloc[split_index:]

    # Extract target variable
    y_train = train_data['y']
    y_test = test_data['y']

    # Initialize variables to track the best parameters
    best_rmse = float('inf')
    best_model = None
    best_params_location = None

    # Grid search over hyperparameters
    for trend in trend_options:
        for seasonal in seasonal_options:
            for seasonal_period in seasonal_periods:
                try:
                    # Initialize and fit the Exponential Smoothing model
                    model = ExponentialSmoothing(
                        y_train,
                        trend=trend,
                        seasonal=seasonal,
                        seasonal_periods=seasonal_period,
                        initialization_method='estimated'
                    )
                    fitted_model = model.fit()

                    # Forecast on the test set
                    y_pred = fitted_model.forecast(steps=len(y_test))

                    # Evaluate performance
                    rmse, mape, r2 = calculate_metrics(y_test.values, y_pred)

                    # Update the best model if the current one is better
                    if rmse < best_rmse:
                        best_rmse = rmse
                        best_model = fitted_model
                        best_params_location = (trend, seasonal, seasonal_period)
                except Exception as e:
                    print(f"Error with parameters (trend={trend}, seasonal={seasonal}, seasonal_period={seasonal_period}): {e}")

    # Save the best model and parameters
    model_filename = f'ets_model_{location.replace(" ", "_").replace("–", "_")}.pkl'
    joblib.dump(best_model, model_filename)
    best_params[location] = best_params_location

    # Forecast on the test set using the best model
    y_pred = best_model.forecast(steps=len(y_test))

    # Evaluate final performance
    rmse, mape, r2 = calculate_metrics(y_test.values, y_pred)

    # Store the metrics
    results[location] = {'RMSE': rmse, 'MAPE': mape, 'R²': r2}
    print(f"Best parameters for {location}: {best_params_location}")
    print(f"Metrics for {location}: RMSE={rmse}, MAPE={mape}, R²={r2}")

# Save the results and best parameters to CSV files
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.to_csv('ets_evaluation_metrics.csv', index=True)
print("Evaluation metrics saved to 'ets_evaluation_metrics.csv'")

best_params_df = pd.DataFrame.from_dict(best_params, orient='index', columns=['Trend', 'Seasonal', 'Seasonal_Period'])
best_params_df.to_csv('ets_best_hyperparameters.csv', index=True)
print("Best hyperparameters saved to 'ets_best_hyperparameters.csv'")

Processing Little Collins St-Swanston St (East)...


  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_inde

Best parameters for Little Collins St-Swanston St (East): (None, 'add', 168)
Metrics for Little Collins St-Swanston St (East): RMSE=346.6349559920352, MAPE=138.56614016953264, R²=0.6726440471774744
Processing Faraday St-Lygon St (West)...


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index

Best parameters for Faraday St-Lygon St (West): (None, 'add', 168)
Metrics for Faraday St-Lygon St (West): RMSE=163.7136447905953, MAPE=1346.7058169969482, R²=0.2105773684293799
Processing Melbourne Central...


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return err.T @ err
  return get_prediction_index(
  return

Best parameters for Melbourne Central: ('add', 'add', 168)
Metrics for Melbourne Central: RMSE=355.0801734250662, MAPE=142.85741400025037, R²=0.7959163331843025
Processing Chinatown-Lt Bourke St (South)...


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return err.T @ err
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return err.T @ err
  return get_predi

Best parameters for Chinatown-Lt Bourke St (South): ('add', 'add', 24)
Metrics for Chinatown-Lt Bourke St (South): RMSE=199.67139306837186, MAPE=272.03363175461595, R²=0.5543778225196683
Processing Lonsdale St (South)...


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index

Best parameters for Lonsdale St (South): (None, 'add', 168)
Metrics for Lonsdale St (South): RMSE=139.83375615975024, MAPE=44.40059810713685, R²=0.752758943541702
Evaluation metrics saved to 'ets_evaluation_metrics.csv'
Best hyperparameters saved to 'ets_best_hyperparameters.csv'


  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
