# Prophet Building for Multivariate 

In [3]:
import pandas as pd

# Load the CSV file
df = pd.read_csv(
    '~/Desktop/DATA_PROJECT/HSG_BA_and_DS_Applications/data/processed/final_df.csv',
    parse_dates=True,
    index_col=0
)

# Ensure the index is in datetime format
df.index = pd.to_datetime(df.index)
df_copy = df.copy()

df_copy.columns

Index(['Hour', 'Little Collins St-Swanston St (East)',
       'Faraday St-Lygon St (West)', 'Melbourne Central',
       'Chinatown-Lt Bourke St (South)', 'Lonsdale St (South)',
       'Lygon St (West)', 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h',
       'clouds_all', 'Weekday_2', 'Weekday_3', 'Weekday_4', 'Weekday_5',
       'Weekday_6', 'Weekday_7', 'Month_2', 'Month_3', 'Month_4', 'Month_5',
       'Month_6', 'Month_7', 'Month_8', 'Month_9', 'Month_10', 'Month_11',
       'Month_12', 'Season_Spring', 'Season_Summer', 'Season_Winter'],
      dtype='object')

In [2]:
# Example for one location
location = 'Little Collins St-Swanston St (East)'

# Prepare the data for Prophet (from the copied dataset)
data = df_copy[['Hour', location, 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h', 'clouds_all']].rename(
    columns={'Hour': 'ds', location: 'y'}
)

# Convert 'ds' to datetime format
data['ds'] = pd.to_datetime(data['ds'])

In [22]:
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Define evaluation metrics
def calculate_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    y_true_nonzero = np.where(y_true == 0, np.nan, y_true)  # Avoid divide-by-zero for MAPE
    mape = (np.abs((y_true - y_pred) / y_true_nonzero)).mean() * 100  # Exclude NaNs
    r2 = r2_score(y_true, y_pred)
    return rmse, mape, r2


# Load the dataset
df = pd.read_csv('~/Desktop/DATA_PROJECT/HSG_BA_and_DS_Applications/data/processed/final_df.csv', parse_dates=True, index_col=0)
df.index = pd.to_datetime(df.index)
df_copy = df.copy()

# Locations to process
locations = [
    'Little Collins St-Swanston St (East)',
    'Faraday St-Lygon St (West)',
    'Melbourne Central',
    'Chinatown-Lt Bourke St (South)',
    'Lonsdale St (South)'
]

results = {}

for location in locations:
    print(f"Processing {location}...")

    # Prepare data for the current location
    data = df_copy[['Hour', location, 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h', 'clouds_all']].rename(
        columns={'Hour': 'ds', location: 'y'}
    )
    data['ds'] = pd.to_datetime(data['ds'])
    data = data[data['y'] > 0]  # Remove zero counts for stability

    # Log-transform the target variable
    data['y'] = np.log(data['y'] + 1)

    # Split into training and testing (last 16 days for testing)
    split_index = int(len(data) - 16 * 24)
    train_data = data.iloc[:split_index]
    test_data = data.iloc[split_index:]

    # Initialize Prophet with default settings
    model = Prophet()
    model.add_regressor('IsPublicHoliday')
    model.add_regressor('temp')
    model.add_regressor('humidity')
    model.add_regressor('rain_1h')
    model.add_regressor('clouds_all')

    # Train the model
    model.fit(train_data)

    # Predict on the test set
    future = test_data[['ds', 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h', 'clouds_all']]
    forecast = model.predict(future)

    # Inverse transform predictions back to original scale
    forecast['yhat'] = np.exp(forecast['yhat']) - 1

    # Evaluate performance
    y_true = np.exp(test_data['y'].values) - 1  # Inverse-transform true values
    y_pred = forecast['yhat'].values
    rmse, mape, r2 = calculate_metrics(y_true, y_pred)

    # Save the trained model
    model_filename = f'log_transformed_multivariate_model_{location.replace(" ", "_").replace("–", "_")}.pkl'
    joblib.dump(model, model_filename)
    print(f"Log-transformed multivariate model for {location} saved!")

    # Store the metrics
    results[location] = {'RMSE': rmse, 'MAPE': mape, 'R²': r2}
    print(f"Metrics for {location}: RMSE={rmse}, MAPE={mape}, R²={r2}")

# Save the results to a CSV file
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.to_csv('log_transformed_multivariate_evaluation_metrics.csv', index=True)
print("Evaluation metrics for log-transformed multivariate Prophet saved to 'log_transformed_multivariate_evaluation_metrics.csv'")

23:13:54 - cmdstanpy - INFO - Chain [1] start processing


Processing Little Collins St-Swanston St (East)...


23:13:57 - cmdstanpy - INFO - Chain [1] done processing
23:13:58 - cmdstanpy - INFO - Chain [1] start processing


Log-transformed multivariate model for Little Collins St-Swanston St (East) saved!
Metrics for Little Collins St-Swanston St (East): RMSE=794.5802379887111, MAPE=517.2968812527722, R²=-0.7200895159410721
Processing Faraday St-Lygon St (West)...


23:14:00 - cmdstanpy - INFO - Chain [1] done processing
23:14:00 - cmdstanpy - INFO - Chain [1] start processing


Log-transformed multivariate model for Faraday St-Lygon St (West) saved!
Metrics for Faraday St-Lygon St (West): RMSE=261.4461828647679, MAPE=1301.8119530139513, R²=-1.0132811576102099
Processing Melbourne Central...


23:14:03 - cmdstanpy - INFO - Chain [1] done processing
23:14:03 - cmdstanpy - INFO - Chain [1] start processing


Log-transformed multivariate model for Melbourne Central saved!
Metrics for Melbourne Central: RMSE=1161.7060323692556, MAPE=607.8886577023642, R²=-1.1844790857568563
Processing Chinatown-Lt Bourke St (South)...


23:14:05 - cmdstanpy - INFO - Chain [1] done processing
23:14:05 - cmdstanpy - INFO - Chain [1] start processing


Log-transformed multivariate model for Chinatown-Lt Bourke St (South) saved!
Metrics for Chinatown-Lt Bourke St (South): RMSE=400.9053091144363, MAPE=688.5432676743274, R²=-0.7964648151625966
Processing Lonsdale St (South)...


23:14:08 - cmdstanpy - INFO - Chain [1] done processing


Log-transformed multivariate model for Lonsdale St (South) saved!
Metrics for Lonsdale St (South): RMSE=401.26157452646555, MAPE=286.2866504270227, R²=-1.0358777184028685
Evaluation metrics for log-transformed multivariate Prophet saved to 'log_transformed_multivariate_evaluation_metrics.csv'


