# Libraries and data

In [1]:
!pip freeze

In [None]:
# Path to the folder
%cd /content/drive/MyDrive/Python - Time Series Forecasting/Modern Time Series Forecasting Techniques /CAPSTONE PROJECT: Prophet

/content/drive/MyDrive/Python - Time Series Forecasting/Modern Time Series Forecasting Techniques /CAPSTONE PROJECT: Prophet


In [None]:
# Import libraries
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.diagnostics import cross_validation, performance_metrics

In [None]:
# Load the CSV file
df = pd.read_csv('DHS_weekly.csv')
# Rename the columns, date to 'ds' and target variable to 'y'
df.rename(columns={'Date': 'ds', 'Total Individuals in Shelter': 'y'}, inplace=True)
# convert the column 'date' to datetime
df['ds'] = pd.to_datetime(df['ds'])
df.tail()

Unnamed: 0,ds,y,Easter,Thanksgiving,Christmas,Temperature
361,2020-12-06,375444,0,0,0,10.072857
362,2020-12-13,375820,0,0,0,8.208571
363,2020-12-20,375615,0,0,0,3.535714
364,2020-12-27,374203,0,0,1,7.51
365,2021-01-03,212514,0,0,0,6.625


In [None]:
# Prepare holiday dataframe for Easter
holidays = pd.DataFrame({
    'holiday': 'Easter',  # Name of the holiday
    'ds': df['ds'][df['Easter'] == 1],  # Dates of the holiday
    'lower_window': 0,  # Number of days before the holiday to include in the effect
    'upper_window': 1,  # Number of days after the holiday to include in the effect
})

# Loop through other holidays (Thanksgiving and Christmas) to append them to the holidays dataframe
for holiday in ['Thanksgiving', 'Christmas']:
    temp = pd.DataFrame({
        'holiday': holiday,  # Name of the holiday
        'ds': df['ds'][df[holiday] == 1],  # Dates of the holiday
        'lower_window': 0,  # Number of days before the holiday to include in the effect
        'upper_window': 1,  # Number of days after the holiday to include in the effect
    })
    holidays = pd.concat([holidays, temp])  # Concatenate the new holiday dataframe to the existing holidays dataframe

holidays

# Prophet Model

In [None]:
# Assuming the test set is intended to be for the last 60 days in the dataset
max_date = df['ds'].max()  # Get the maximum date in the dataset
split_date = max_date - pd.Timedelta(weeks=13)  # Set the split date 13 weeks (91 days) before the last date

# Split the data into training and testing sets based on the split date
train_df = df[df['ds'] <= split_date]  # Training set includes data up to and including the split date
test_df = df[df['ds'] > split_date]  # Testing set includes data after the split date


In [None]:
# Initialize the Prophet model
model = Prophet(holidays=holidays) # Add the holidays
model.add_regressor('Temperature')  # Adding temperature as a regressor
# Fit the model on the training data
model.fit(train_df)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpjyzxepva/9vh9xy52.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpjyzxepva/4syecnf6.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23103', 'data', 'file=/tmp/tmpjyzxepva/9vh9xy52.json', 'init=/tmp/tmpjyzxepva/4syecnf6.json', 'output', 'file=/tmp/tmpjyzxepva/prophet_model_zz43osa/prophet_model-20240425150335.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:03:35 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
15:03:35 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7b1aa5a4de70>

In [None]:
# Create a dataframe for predictions
future_df = model.make_future_dataframe(periods=13, freq='W')  # Generate future dates for 13 weeks

# Include the regressors in the future dataframe
future_df = future_df.merge(df[['ds', 'Temperature']], on='ds', how='left')

# Predict over the future dataframe
forecast = model.predict(future_df)

In [None]:
# Evaluate predictions

# Aligning predicted 'yhat' with the actual 'y' in the test set
test_df = test_df.set_index('ds')  # Set the index of test_df to 'ds' (date)
forecast.set_index('ds', inplace=True)  # Set the index of forecast to 'ds' (date)
forecast = forecast.join(test_df['y'])  # Join the actual 'y' values from the test set to the forecast dataframe

# Calculate mean absolute error
forecast['error'] = (forecast['y'] - forecast['yhat']).abs()  # Calculate the absolute error between actual and predicted values
mae = forecast['error'].mean()  # Calculate the mean of the absolute errors
print(f"Mean Absolute Error: {mae}")  # Print the Mean Absolute Error


Mean Absolute Error: 29485.304917293375


In [None]:
# Visualizing the forecast
from prophet.plot import plot_plotly
plot_plotly(model, forecast.reset_index())

In [None]:
from prophet.plot import plot_plotly, plot_components_plotly
import matplotlib.pyplot as plt

# Plot the components of the forecast
fig_components = plot_components_plotly(model, forecast.reset_index())
fig_components.show()

# PArameter Tuning

In [None]:
from prophet.diagnostics import cross_validation, performance_metrics
import itertools
import numpy as np

In [None]:
# Define all combinations of parameters for grid search
param_grid = {
    'changepoint_prior_scale': [0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.1, 1.0, 10.0],
    'holidays_prior_scale': [0.1, 1.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative']
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

In [None]:
# Iterate over all parameter combinations
for params in all_params:
    # Initialize and fit the Prophet model with the given parameters and holidays
    m = Prophet(holidays=holidays, **params).fit(train_df)

    # Perform cross-validation
    df_cv = cross_validation(
        m,
        initial='1500 days',  # Initial training period
        period='42 days',     # Period between cutoff dates
        horizon='91 days',    # Forecast horizon
        parallel="processes"  # Use parallel processing
    )

    # Calculate performance metrics
    df_p = performance_metrics(df_cv, rolling_window=1)

    # Append the first RMSE value to the rmses list
    rmses.append(df_p['rmse'].values[0])


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
DEBUG:cmdstanpy:input tempfile: /tmp/tmpjyzxepva/xbujd62n.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=51900', 'data', 'file=/tmp/tmpjyzxepva/n624b569.json', 'init=/tmp/tmpjyzxepva/xbujd62n.json', 'output', 'file=/tmp/tmpjyzxepva/prophet_modelblky345i/prophet_model-20240425152759.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
15:27:59 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpjyzxepva/36eb8fh1.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpjyzxepva/9grqv2uw.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=85503', 'da

In [None]:
# Find the best parameters

# Get the row of the minimum RMSE value in the rmses list
best_params = all_params[np.argmin(rmses)]
print('Best Parameters:', best_params)

Best Parameters: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 10.0, 'holidays_prior_scale': 10.0, 'seasonality_mode': 'multiplicative'}
