# Neural Prophet model

Overall process:
- Neural Prophet model with daily sales value only (Basic Neural Prophet model)
- Neural Prophet model with daily sales value, total daily customers, daily purchase amount per quantity, weekend flag

Packages:
1. pandas
2. numpy
3. neuralprophet
4. scikit-learn

In [None]:
import time
from neuralprophet import NeuralProphet
from neuralprophet import set_log_level
from sklearn.metrics import mean_absolute_percentage_error as MAPE_metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Disable logging messages unless there is an error
set_log_level("ERROR")

# Prediction timeframe: 14 days
# Training timeframe: 56 days (4 weeks)
TEST_SIZE = 14
TRAIN_SIZE = TEST_SIZE * 4

session = get_active_session()
session.use_database("ml")
session.use_schema("retail_store")

transactions_df = session.table("store_2_preprocessed_transactions")
transactions_df = transactions_df.to_pandas()
transactions_df = transactions_df.drop("STORE_CHAIN_ID", axis=1)
transactions_df["DATE"] = pd.to_datetime(transactions_df["DATE"])
transactions_df = transactions_df.sort_values(by='DATE')

- Functions for future use

In [None]:
def plot_graph(train_values, actual_values, predictions):
    """
    Plot a graph showing train data, actual values and predictions.
    
    The function plots three lines:
    1. Training data values
    2. Actual test values 
    3. Predicted values
    
    The x-axis represents time steps and y-axis represents the values.

    Args:
        train_values: Array of training data values to plot
        actual_values: Array of actual test values to plot
        predictions: Array of predicted values to plot
    """
    x_train = np.linspace(0, len(train_values), len(train_values))
    x = np.linspace(len(train_values), len(train_values) + len(actual_values), len(actual_values))

    plt.plot(x_train, train_values)
    plt.plot(x, actual_values)
    plt.plot(x, predictions)
    plt.legend(["Train Data", "Actual Sales", "Predictions"])
    
    return


def calculate_smape(actual_values, predictions):
    """
    Calculate Symmetric Mean Absolute Percentage Error (SMAPE) between actual and predicted values.
    
    Args:
        actual_values: Array of actual values
        predictions: Array of predicted values
        
    Returns:
        float: SMAPE score as a percentage between 0 and 100
    """
    return 100/len(actual_values) * np.sum(2 * np.abs(predictions - actual_values) / (np.abs(actual_values) + np.abs(predictions)))

### 1. Rename date and total sales columns to ds and y respectively

In [None]:
transactions_df = transactions_df.rename(columns={"DATE": "ds", "TOTAL_SALES": "y"})

### 2. Fit basic NeuralProphet model

Optimal results:
- MAPE value: 0.0978
- SMAPE value: 5.0497
- Tuning time: -
- Fitting time: 11.598s

For other feature combinations optimal results, refer to Model Performance document.

In [None]:
np_model = NeuralProphet(
    # n_changepoints=10
    yearly_seasonality=False,
    weekly_seasonality=True,
    daily_seasonality=False,
    n_lags=TRAIN_SIZE,
    n_forecasts=TEST_SIZE,
    collect_metrics={"MAPE": "MeanAbsolutePercentageError", "SMAPE": "SymmetricMeanAbsolutePercentageError"}
)

# Uncomment to add any other features
# np_model.add_lagged_regressor("TOTAL_CUSTOMERS", n_lags=7)
# np_model.add_lagged_regressor("PURCHASE_AMT_PER_QTY", n_lags=7)
# np_model.add_lagged_regressor("FLAG_WEEKEND", n_lags=7)

In [None]:
start_time = time.time()
metrics = np_model.fit(transactions_df[["ds", "y"]], freq="D")
forecast = np_model.predict(transactions_df[["ds", "y"]])
end_time = time.time()

In [None]:
print(f"NeuralProphet model fitting time: {end_time - start_time} seconds")
print(metrics)

In [None]:
np_model.highlight_nth_step_ahead_of_each_forecast(1)
np_model.plot(forecast)

In [None]:
np_model.plot_parameters(components=["trend", "seasonality"])

### 3. Predict last 14 days

In [None]:
np_model = NeuralProphet(
    # n_changepoints=10
    yearly_seasonality=False,
    weekly_seasonality=True,
    daily_seasonality=False,
    n_lags=TRAIN_SIZE,
    n_forecasts=TEST_SIZE,
    growth="logistic",
    seasonality_mode="multiplicative",
    collect_metrics={"MAPE": "MeanAbsolutePercentageError", "SMAPE": "SymmetricMeanAbsolutePercentageError"}
)

# Uncomment to add any other features
# np_model.add_lagged_regressor("TOTAL_CUSTOMERS", n_lags=14)
# np_model.add_lagged_regressor("PURCHASE_AMT_PER_QTY", n_lags=14)
# np_model.add_lagged_regressor("FLAG_WEEKEND", n_lags=14)

In [None]:
start_time = time.time()
train_data = transactions_df[["ds", "y"]].iloc[:-TEST_SIZE]
np_model.fit(train_data, freq="D")
end_time = time.time()

In [None]:
print(f"NeuralProphet model fitting time: {end_time - start_time} seconds")


In [None]:
df_future = np_model.make_future_dataframe(train_data, n_historic_predictions=True, periods=TEST_SIZE)
forecast = np_model.predict(df_future)
np_model.highlight_nth_step_ahead_of_each_forecast(TEST_SIZE)
np_model.plot(forecast)

In [None]:
np_model.plot_parameters(components=["trend", "seasonality"])

- Calculate MAPE and SMAPE for last 14 days prediction

In [None]:
last_14_predictions = forecast["yhat14"].iloc[-TEST_SIZE:].values
last_14_val = transactions_df["y"].iloc[-TEST_SIZE:].values
mape = MAPE_metrics(last_14_val, last_14_predictions)
smape = calculate_smape(last_14_val, last_14_predictions)

print(f"MAPE value for last 14 days prediction: {mape}")
print(f"SMAPE value for last 14 days prediction: {smape}")

- Plot prediction vs actual graph

In [None]:
train_values = train_data["y"].iloc[-TRAIN_SIZE:]
plot_graph(train_values, last_14_val, last_14_predictions)

In [None]:
session.close()