# Cortex Time Series Forecasting model
Overall Process:
- Test simple time series forecasting using Snowflake Cortex forecast model

Note:
- If you want to add additional features to this model, you would need to pass in future known values of these features to be able to generate a forecast (such like: holidays, weekends etc.)

### 1. Create simple forecast

In [None]:
-- Train model
CREATE OR REPLACE SNOWFLAKE.ML.FORECAST simple_forecast_model(
    INPUT_DATA => TABLE(
        WITH ordered_data AS(
            SELECT date, total_sales,
                ROW_NUMBER() OVER (ORDER BY date) AS row_num
            FROM store_2_preprocessed_transactions
        )
        SELECT date, total_sales
        FROM ordered_data
        WHERE row_num <= (SELECT COUNT(*) - 14 FROM store_2_preprocessed_transactions)
    ),
    TIMESTAMP_COLNAME => 'date',
    TARGET_COLNAME => 'total_sales'
);

In [None]:
-- Generate forecast using model
CREATE OR REPLACE TEMPORARY TABLE simple_cortex_result AS
SELECT * FROM TABLE(
    forecast_model!FORECAST(FORECASTING_PERIODS => 14)
);

In [None]:
SELECT * FROM simple_cortex_result;

### 2. Analyse cortex results

Packages: 
1. matplotlib
2. scikit-learn
3. pandas
4. numpy

Optimal results:
- MAPE value: 0.0449
- SMAPE value: 4.594
- Tuning time: 46s
- Fitting time: 16s

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_percentage_error as MAPE_metrics

TEST_SIZE = 14
TRAIN_SIZE = TEST_SIZE * 4

session = get_active_session()
session.use_database("ml")
session.use_schema("retail_store")
data = session.table("simple_cortex_result")
data = data.to_pandas()

data["TS"] = pd.to_datetime(data["TS"])
data = data.sort_values(by='TS', ignore_index=True)


In [None]:
def plot_graph(actual_values, predictions):
    """
    Plot a graph showing actual values and predictions.
    
    The function plots three lines:
    1. Actual test values 
    2. Predicted values
    
    The x-axis represents time steps and y-axis represents the values.

    Args:
        actual_values: Array of actual test values to plot
        predictions: Array of predicted values to plot
    """
    x = np.linspace(0, len(actual_values), len(actual_values))

    plt.plot(x, actual_values)
    plt.plot(x, predictions)
    plt.legend(["Actual Sales", "Predictions"])
    plt.show()
    
    return


def calculate_smape(actual_values, predictions):
    """
    Calculate Symmetric Mean Absolute Percentage Error (SMAPE) between actual and predicted values.
    
    Args:
        actual_values: Array of actual values
        predictions: Array of predicted values
        
    Returns:
        float: SMAPE score as a percentage between 0 and 100
    """
    return 100/len(actual_values) * np.sum(2 * np.abs(predictions - actual_values) / (np.abs(actual_values) + np.abs(predictions)))

In [None]:
# Calculate MAPE and SMAPE metrics
val_data = session.table("store_2_preprocessed_transactions")
val_data = val_data.to_pandas()
val_data = val_data.sort_values(by='DATE', ignore_index=True)
val_data = val_data["TOTAL_SALES"].iloc[-TEST_SIZE:].values

predictions = data["FORECAST"].iloc[-TEST_SIZE:].values

mape = MAPE_metrics(val_data, predictions)
smape = calculate_smape(val_data, predictions)

print(f"MAPE value for last 14 days prediction: {mape}")
print(f"SMAPE value for last 14 days prediction: {smape}")

In [None]:
plot_graph(val_data, predictions)

### 2. Forecast model with exogenous variable (flag_weekend)

- Only can include flag_weekend feature as predictions require future exogenous values.

Optimal results:
- MAPE value: 0.0449
- SMAPE value: 4.602
- Tuning time: 46s
- Fitting time: 16s

Analysis: Results are similar to Cortex forecasting model with sales data only, the Cortex forecasting model may have taken into account the weekday vs weekend feature based on the input dates.

In [None]:
-- Train model
CREATE OR REPLACE SNOWFLAKE.ML.FORECAST feature_forecast_model(
    INPUT_DATA => TABLE(
        WITH ordered_data AS(
            SELECT date, total_sales, flag_weekend,
                ROW_NUMBER() OVER (ORDER BY date) AS row_num
            FROM store_2_preprocessed_transactions
        )
        SELECT date, total_sales, flag_weekend
        FROM ordered_data
        WHERE row_num <= (SELECT COUNT(*) - 14 FROM store_2_preprocessed_transactions)
    ),
    TIMESTAMP_COLNAME => 'date',
    TARGET_COLNAME => 'total_sales'
);

- Create a view for the future features table

In [None]:
CREATE OR REPLACE VIEW future_features_view AS
WITH ordered_data AS(
    SELECT date, total_sales, flag_weekend,
        ROW_NUMBER() OVER (ORDER BY date) AS row_num
    FROM store_2_preprocessed_transactions
)
SELECT date, total_sales, flag_weekend
FROM ordered_data
QUALIFY row_num > (SELECT COUNT(*) - 14 FROM store_2_preprocessed_transactions)
ORDER BY date;


- Generate forecast

In [None]:
-- Generate forecast using model
CREATE OR REPLACE TEMPORARY TABLE feature_cortex_result AS
SELECT * FROM TABLE(
    feature_forecast_model!FORECAST(
        INPUT_DATA => TABLE(future_features_view),
        TIMESTAMP_COLNAME => 'date'
    )
);

In [None]:
SELECT * FROM feature_cortex_result;

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_percentage_error as MAPE_metrics

TEST_SIZE = 14
TRAIN_SIZE = TEST_SIZE * 4

session = get_active_session()
session.use_database("ml")
session.use_schema("retail_store")
data = session.table("feature_cortex_result")
data = data.to_pandas()

data["TS"] = pd.to_datetime(data["TS"])
data = data.sort_values(by='TS', ignore_index=True)

In [None]:
val_data = session.table("store_2_preprocessed_transactions")
val_data = val_data.to_pandas()
val_data = val_data.sort_values(by='DATE', ignore_index=True)
val_data = val_data["TOTAL_SALES"].iloc[-TEST_SIZE:].values

predictions = data["FORECAST"].iloc[-TEST_SIZE:].values

mape = MAPE_metrics(val_data, predictions)
smape = calculate_smape(val_data, predictions)

print(f"MAPE value for last 14 days prediction: {mape}")
print(f"SMAPE value for last 14 days prediction: {smape}")

In [None]:
plot_graph(val_data, predictions)

In [None]:
session.close()