# Prophet Building for Multivariate with Roll and shift

In [None]:
import pandas as pd
import numpy as np
from prophet import Prophet
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [3]:
import pandas as pd

df = pd.read_csv(
    '~/Desktop/DATA_PROJECT/HSG_BA_and_DS_Applications/data/processed/final_df.csv',
    parse_dates=True,
    index_col=0)

df.index = pd.to_datetime(df.index)
df_copy = df.copy()

df_copy.columns

Index(['Hour', 'Little Collins St-Swanston St (East)',
       'Faraday St-Lygon St (West)', 'Melbourne Central',
       'Chinatown-Lt Bourke St (South)', 'Lonsdale St (South)',
       'Lygon St (West)', 'IsPublicHoliday', 'temp', 'humidity', 'rain_1h',
       'clouds_all', 'Weekday_2', 'Weekday_3', 'Weekday_4', 'Weekday_5',
       'Weekday_6', 'Weekday_7', 'Month_2', 'Month_3', 'Month_4', 'Month_5',
       'Month_6', 'Month_7', 'Month_8', 'Month_9', 'Month_10', 'Month_11',
       'Month_12', 'Season_Spring', 'Season_Summer', 'Season_Winter'],
      dtype='object')

## Evaluation Metrics (RSME; MAPE; R**2)

In [None]:
def calculate_metrics(y_true, y_pred):
    rmse = mean_squared_error(y_true, y_pred, squared=False)
    y_true_nonzero = np.where(y_true == 0, np.nan, y_true)  # Avoid divide-by-zero for MAPE
    mape = (np.abs((y_true - y_pred) / y_true_nonzero)).mean() * 100  # Exclude NaNs
    r2 = r2_score(y_true, y_pred)
    return rmse, mape, r2

## Define the locations and the external regressors

In [None]:
# List of locations to forecast
locations = [
    'Little Collins St-Swanston St (East)',
    'Faraday St-Lygon St (West)',
    'Melbourne Central',
    'Chinatown-Lt Bourke St (South)',
    'Lonsdale St (South)']

# List of external regressors
external_regressors = ['Hour', 'temp', 'rain_1h'] + [f'Weekday_{i}' for i in range(2, 8)]

results = {}

## Model loop for all five locations
##### Drop rows with empty values (almost none)
##### Add the rolling average and lag features (External regressors)
##### Train and test the model, saving it for streamlit application 

In [None]:
for location in locations:
    print(f"Processing {location}...")

    # Prepare data for the current location
    data = df[[location] + external_regressors].rename(columns={location: 'y'})
    data['ds'] = df.index

    # Remove rows with missing values
    data = data.dropna()

    # Add rolling average and lag features
    data['y_lag_1'] = data['y'].shift(1)
    data['y_roll_24'] = data['y'].rolling(window=24).mean()
    data = data.dropna()  # Drop rows with NaNs after creating lag and rolling features

    # Split into training and testing (last 16 days for testing)
    split_index = len(data) - 16 * 24
    train_data = data.iloc[:split_index]
    test_data = data.iloc[split_index:]

    # Initialize the Prophet model
    model = Prophet(daily_seasonality=True, weekly_seasonality=True)
    
    # Add external regressors
    for regressor in external_regressors + ['y_lag_1', 'y_roll_24']:
        model.add_regressor(regressor)

    # Fit the model
    model.fit(train_data)

    # Make future dataframe
    future = test_data[['ds']].copy()  # Use test data's 'ds' column as the future frame
    
    # Add regressors for the test data
    for regressor in external_regressors + ['y_lag_1', 'y_roll_24']:
        future[regressor] = test_data[regressor].values

    # Predict
    forecast = model.predict(future)

    # Extract predictions
    y_true = test_data['y'].values
    y_pred = forecast['yhat'].values

    # Calculate metrics
    rmse, mape, r2 = calculate_metrics(y_true, y_pred)
    results[location] = {'RMSE': rmse, 'MAPE': mape, 'R²': r2}
    print(f"Metrics for {location}: RMSE={rmse}, MAPE={mape}, R²={r2}")

# Save results to CSV
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.to_csv('prophet_with_lags_and_rolling_results.csv')

# Save the trained model for each location
model_filename = f'prophet_mondel_{location.replace(" ", "_").replace("-", "_")}.pkl'
joblib.dump(best_model, model_filename)
print(f"Model for {location} saved as {model_filename}")