# Databricks notebook source

## 1. Libraries

In [None]:
!pip install u8darts[all]==0.29.0
!pip install mlflow==2.11.3
!pip install holidays==0.45
!pip install numpy==1.23.5
dbutils.library.restartPython()

In [None]:
# Darts
from darts import TimeSeries
from darts.models.forecasting.lgbm import LightGBMModel
from darts.dataprocessing.transformers import Scaler
from darts import metrics

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Utilities
import pandas as pd
import numpy as np
import datetime
import mlflow
import mlflow.sklearn
from datetime import timedelta
import datetime
import holidays
from scipy import stats
from sklearn.preprocessing import PowerTransformer, MinMaxScaler
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
import pickle
from statsmodels.tsa.seasonal import seasonal_decompose

# Notebook configuration
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set_theme(style="whitegrid", palette="pastel")
#import warnings
#warnings.filterwarnings('ignore')


## 2. Utility Functions

In [None]:
# Function to calculate Easter date
def calculate_easter(year):
    "Returns the Easter date for a given year."
    a = year % 19
    b = year // 100
    c = year % 100
    d = b // 4
    e = b % 4
    f = (b + 8) // 25
    g = (b - f + 1) // 3
    h = (19 * a + b - d - g + 15) % 30
    i = c // 4
    k = c % 4
    l = (32 + 2 * e + 2 * i - h - k) % 7
    m = (a + 11 * h + 22 * l) // 451
    month = (h + l - 7 * m + 114) // 31
    day = ((h + l - 7 * m + 114) % 31) + 1
    return pd.Timestamp(year, month, day)


# Function to calculate Carnival date
def calculate_carnival(year):
    "Returns the Carnival date for a given year."
    easter = calculate_easter(year)
    return easter - timedelta(days=47)
    

# Function to determine custom events
def custom_events(date):
    month_day = date.strftime('%m-%d')
    year = date.year

    # Carnival
    carnival = calculate_carnival(year)
    # Carnival Eve
    carnival_eve = carnival - timedelta(days=1)
    # Mother's Day (second Sunday of May)
    mothers_day = pd.Timestamp(year, 5, 1) + pd.DateOffset(weekday=6, weeks=1)
    # Mother's Day Eve
    mothers_day_eve = mothers_day - timedelta(days=1)
    # Valentines Day (June 12th in Brazil)
    valentines_day = pd.Timestamp(year, 6, 12)
    # Black Friday (fourth Friday of November)
    black_friday = pd.Timestamp(year, 11, 1) + pd.DateOffset(weekday=4, weeks=3)
    
    if date == carnival:
        return 'Carnival'
    elif date == carnival_eve:
        return 'Carnival Eve'
    elif month_day == mothers_day.strftime('%m-%d'):
        return 'Mothers Day'
    elif month_day == mothers_day_eve.strftime('%m-%d'):
        return 'Mothers Day Eve'
    elif month_day == valentines_day.strftime('%m-%d'):
        return 'Valentines Day'
    elif month_day == black_friday.strftime('%m-%d'):
        return 'Black Friday'
    else:
        return None


# Function to include events in holiday column
def include_events(row):
    if pd.isna(row['event']):
        return custom_events(row.name)
    else:
        return row['event']
    

# Function to replace values
def replace_event(row):
    if row['event'] not in events_to_consider:
        return 'Normal Day'
    else:
        return row['event']

def replace_or_add(lst, index, new_value):
    if index < len(lst):
        # If index exists in list, replace item
        lst[index] = new_value
    else:
        # If index does not exist, append new value to end of list
        lst.append(new_value)


# Function to remove outliers from metrics
def remove_outliers_zscore(data_list, threshold=3):
    # Remove null values
    data_np = np.array([x for x in data_list if x is not np.nan])

    if len(data_np) == 0:
        return []

    # Calculate z-scores and filter outliers
    z_scores = np.abs((data_np - np.mean(data_np)) / np.std(data_np))
    filtered_data = data_np[z_scores < threshold]

    return filtered_data.tolist()

    
def prepare_input(df, 
                  target_col='NET_VALUE',
                  numeric_features=None, 
                  use_event_flag=False, 
                  events_to_consider=None,
                  scale_trgt=False,
                  scale_cov=False,
                  scale_method=None,
                  dummy_day_of_week=False):
    '''
    Function to prepare model input data.

    Parameters:
    - df (pandas.DataFrame): DataFrame containing the data used.
    - target_col (str): Name of the target series column.
    - numeric_features (list): List with numerical feature columns to be used.
    - use_event_flag (boolean): Whether to use the event flag.
    - events_to_consider (list): List with event names to consider if used as variables.
    - scale_trgt (boolean): Whether to scale the target variable.
    - scale_cov (boolean): Whether to scale the covariates.
    - scale_method (str): Name of the method to be used when scaling data (minmax, power). 

    Returns:
    - list with the following items:
        - series (TimeSeries (DataArray)): target series.
        - series_cov (TimeSeries (DataArray)): covariate series.
        - transformer_trgt (darts.dataprocessing.transformers.scaler.Scaler): target series scaler for scale reversal.
    '''
    return_list = []

    # Generates main series
    series = TimeSeries.from_dataframe(
                                        df=df,
                                        value_cols=target_col,
                                        freq='D')
    return_list.append(series)

    # Initialize series_cov
    series_cov = None

    # Creates covariate series for numerical variables
    if numeric_features:
        series_cov =  TimeSeries.from_dataframe(
                                                df=df,
                                                value_cols=numeric_features,
                                                freq='D')
        replace_or_add(return_list, 1, series_cov)

    # Creates series for event_flag variable
    if use_event_flag:
        series_event_flag = TimeSeries.from_dataframe(
                                                        df=df,
                                                        value_cols='event_flag',
                                                        freq='D')
        
        series_cov = series_cov.stack(series_event_flag) if series_cov else series_event_flag
        replace_or_add(return_list, 1, series_cov)

    # Creates series with dummy of events to consider
    if events_to_consider:
        df['event'] = df.apply(replace_event, axis=1)
        df_dummy = pd.get_dummies(df['event'], prefix='event') * 1
        series_dummy = TimeSeries.from_dataframe(
                                                    df=df_dummy,
                                                    value_cols=df_dummy.columns.tolist(),
                                                    freq='D')
        
        series_cov = series_cov.stack(series_dummy) if series_cov else series_dummy
        replace_or_add(return_list, 1, series_cov)

    # Creates series with day of week dummy
    if dummy_day_of_week:
        df_dummy_dayofweek = pd.get_dummies(df['day_of_week'], prefix='day_of_week') * 1
        series_dummy_dayofweek = TimeSeries.from_dataframe(
                                                            df=df_dummy_dayofweek,
                                                            value_cols=df_dummy_dayofweek.columns.tolist(),
                                                            freq='D')
        series_cov = series_cov.stack(series_dummy_dayofweek) if series_cov else series_dummy_dayofweek
        replace_or_add(return_list, 1, series_cov)

    # Initialize transformers
    transformer_trgt = None
    transformer_cov = None

    # Performs scaling if desired
    if scale_trgt:
        if scale_method == 'minmax':
            scaler_trgt = MinMaxScaler()
        else:
            scaler_trgt = PowerTransformer()
            
        transformer_trgt = Scaler(scaler_trgt)
        series = transformer_trgt.fit_transform(series)
        replace_or_add(return_list, 0, series)
        replace_or_add(return_list, 2, transformer_trgt)

    if scale_cov and series_cov:
        if scale_method == 'minmax':
            scaler_cov = MinMaxScaler()
        else:
            scaler_cov = PowerTransformer()
            
        transformer_cov = Scaler(scaler_cov)
        series_cov = transformer_cov.fit_transform(series_cov)
        replace_or_add(return_list, 1, series_cov)


    return return_list


# Function to create the model
def create_model(use_covariates, lags, output_chunk_length, random_seed, n_estimators, multi_models, lags_past_covariates, lags_future_covariates):
    if use_covariates:
        return LightGBMModel(
            lags=lags,
            output_chunk_length=output_chunk_length,
            lags_past_covariates=lags_past_covariates,
            lags_future_covariates=lags_future_covariates,
            random_state=random_seed,
            n_estimators=n_estimators,
            multi_models=multi_models,
            force_row_wise=False,
            force_col_wise=False)
    else:
        return LightGBMModel(
            lags=lags,
            output_chunk_length=output_chunk_length,
            random_state=random_seed,
            n_estimators=n_estimators,
            multi_models=multi_models,
            force_row_wise=False,
            force_col_wise=True)


# Transform negative values to zero
def replace_negatives_with_zero(x):
    return np.maximum(x, 0)


# Function to calculate WAPE
def calculate_wape(y, yhat):
    """
    Calculates Weighted Absolute Percentage Error (WAPE).
    
    Args:
        y (list or numpy array): Actual values.
        yhat (list or numpy array): Predicted values.
    
    Returns:
        float: WAPE value.
        
    Description:
    This function calculates the Weighted Absolute Percentage Error (WAPE).
    
    WAPE takes into account both absolute error and percentage error between actual and predicted values. 
    Furthermore, it performs error weighting based on actual values. In seasonal sales time series scenarios, 
    seasonal peak periods are assigned a higher weight, so errors in these periods have a more significant impact on this metric.
    
    Weighting is performed by assigning higher weights to larger actual values, since errors in high demand periods 
    can have a more relevant impact on planning decisions and financial results.
    
    In the code section where weight is defined as 'weight = actual', it is considered that the weight of each observation 
    is equal to the actual value of that observation itself. This means that when calculating WAPE, absolute percentage errors 
    are multiplied by the actual values themselves before being summed to calculate total weighted error.
    """
    
    # Initializes variables to store total WAPE and total weights
    total_wape = 0
    total_weight = 0
    
    try:
        # Loop over actual (y) and predicted (yhat) values
        for i in range(len(y)):
            actual = y[i]  # Actual value
            predicted = yhat[i]  # Predicted value
            
            # Calculates absolute error between actual and predicted value
            absolute_error = abs(actual - predicted)
            
            # Calculates absolute percentage error
            absolute_percentage_error = absolute_error / actual
            
            # Calculates weight as the actual value (used for weighting)
            weight = actual
            
            # Updates total WAPE summing weighted percentage error
            total_wape += absolute_percentage_error * weight
            
            # Updates total weights
            total_weight += weight
        
        # Calculates final WAPE as weighted average of percentage errors
        wape = total_wape / total_weight * 100
    except:
        # If an exception occurs (e.g., division by zero), sets WAPE as infinite
        wape = np.inf
        
    return wape


# Function to perform predictions and calculate metrics
def predict_and_evaluate(model, serie_tgrt, serie_tgrt_test, series_cov, scale_trgt, transformer_trgt, target_col):
    df_preds = pd.DataFrame(columns=['NET_VALUE_predicted_05', 'NET_VALUE_predicted', 'NET_VALUE_predicted_95', 'NET_VALUE_real'])
    metrics_lists = {
        'mape': [], 'smape': [], 'ope': [], 'r2': [], 'rmse': [], 'wape': []
    }

    unique_dates = serie_tgrt_test.time_index.strftime('%Y-%m-%d').unique()
    min_time = serie_tgrt.time_index.min()

    progress_bar_general = tqdm(unique_dates, desc='Processing')
    for day in progress_bar_general:
        start_time = pd.Timestamp(day)
        start_real = pd.Timestamp(day) - pd.Timedelta(days=1)
        end_time = pd.Timestamp(day)

        serie_tgrt_filt = serie_tgrt.slice(min_time, start_real)
        serie_tgrt_test_filt = serie_tgrt_test.slice(start_time, end_time)
        current_series = serie_tgrt_filt
        predictions_05, predictions_median, predictions_95 = [], [], []

        if model.uses_past_covariates or model.uses_future_covariates:

            # Predicts
            pred = model.predict(
                series=current_series,
                past_covariates=series_cov,
                future_covariates=series_cov[future_variables],
                n=1,
                predict_likelihood_parameters=True)
            predictions_05.append(pred[f'{target_col}_q0.05'].values().item())
            predictions_median.append(pred[f'{target_col}_q0.50'].values().item())
            predictions_95.append(pred[f'{target_col}_q0.95'].values().item())
            current_series = current_series.append(pred[f'{target_col}_q0.50'])
        else:
            pred = model.predict(series=serie_tgrt_filt, n=len(serie_tgrt_test_filt))
            predictions_median = pred.values().flatten()

        pred_05_series = TimeSeries.from_times_and_values(serie_tgrt_test_filt.time_index, np.array(predictions_05))
        pred_series = TimeSeries.from_times_and_values(serie_tgrt_test_filt.time_index, np.array(predictions_median))
        pred_95_series = TimeSeries.from_times_and_values(serie_tgrt_test_filt.time_index, np.array(predictions_95))

        if scale_trgt:
            pred_05_series = transformer_trgt.inverse_transform(pred_05_series)
            pred_series = transformer_trgt.inverse_transform(pred_series)
            pred_95_series = transformer_trgt.inverse_transform(pred_95_series)
            serie_tgrt_test_filt = transformer_trgt.inverse_transform(serie_tgrt_test_filt)

        pred_05_series = pred_05_series.map(replace_negatives_with_zero)
        pred_series = pred_series.map(replace_negatives_with_zero)
        pred_95_series = pred_95_series.map(replace_negatives_with_zero)

        if start_time.weekday() < 5:
            df_trgt = pred_series.pd_dataframe()
            if df_trgt.index.weekday.unique() < 5:
                mape = metrics.mape(serie_tgrt_test_filt + 1, pred_series + 1)
                smape = metrics.smape(serie_tgrt_test_filt + 1, pred_series + 1)
                ope = metrics.ope(serie_tgrt_test_filt + 1, pred_series + 1)
                r2 = metrics.r2_score(serie_tgrt_test_filt + 1, pred_series + 1)
                rmse = metrics.rmse(serie_tgrt_test_filt + 1, pred_series + 1)
                wape = calculate_wape(serie_tgrt_test_filt + 1, pred_series + 1)

                metrics_lists['mape'].append(mape)
                metrics_lists['smape'].append(smape)
                metrics_lists['ope'].append(ope)
                metrics_lists['r2'].append(r2)
                metrics_lists['rmse'].append(rmse)
                metrics_lists['wape'].append(wape.values().squeeze().item())

        df_preds = pd.concat([df_preds, 
                              pred_05_series.stack(pred_series).stack(pred_95_series).pd_dataframe().rename(
                                  columns={'0': 'NET_VALUE_predicted_05', '0_1': 'NET_VALUE_predicted', '0_1_1': 'NET_VALUE_predicted_95'})])

    return df_preds, metrics_lists


## 3. Input Data and Registered Model Import

In [None]:
# Acquiring max date already predicted
str_select_dt_max_predicted = '''
                                SELECT max(dt)
                                FROM analytics.refined_sales_orders_forecast
                                '''

dt_max_predicted = spark.sql(str_select_dt_max_predicted).toPandas()
dt_max_predicted = dt_max_predicted.iloc[0].dt.strftime('%Y-%m-%d').values[0]
dt_max_predicted

In [None]:
# Sales Series
str_select_sales = '''
                        SELECT *
                        FROM analytics.refined_sales_orders_agg
                        '''

df_series_sales = spark.sql(str_select_sales).toPandas()

# Setting SYSTEM_TIMESTAMP column as DataFrame index
df_series_sales.set_index('SYSTEM_TIMESTAMP', inplace=True)
df_series_sales.sort_index(inplace=True)

# REMOVE THIS LINE! Created only to generate the first data mass
#df_series_sales = df_series_sales[df_series_sales.index < dt_max_predicted]

# Acquiring last full period
last_full_period = df_series_sales.index.max()
last_full_period

In [None]:
df_series_sales.info()

In [None]:
# Importing registered model from mlflow
model_stage = 'staging'
mlflow_model_name = f'models:/LightGBM_forecast_sales_daily/{model_stage}'
model = mlflow.sklearn.load_model(mlflow_model_name)

# Loading scaler
artifacts_root = '/Workspace/Repos/DataScience/FORECAST_PROJECT/src/artifacts/'
with open(f'{artifacts_root}transformer_trgt.pkl', 'rb') as f: # must be in the same folder as this notebook
    transformer_trgt = pickle.load(f)

In [None]:
# Inspecting model parameters
model.model_params


## 4. Data Preparation

In [None]:
# Converts granularity to Daily
df_series_sales_resampled = df_series_sales.resample('D').sum()

# Filtering up to the last full period
df_series_sales_resampled = df_series_sales_resampled[df_series_sales_resampled.index < last_full_period]

# Decomposing the series for residual outlier verification
result = seasonal_decompose(df_series_sales_resampled['NET_VALUE'], model='additive', period=8)
trend = result.trend
seasonal = result.seasonal
residual = result.resid

# Calculates residual z-scores
z_scores = stats.zscore(residual.dropna())

# Gets outlier indices
zscore_threshold = 3
outliers = np.abs(z_scores) > zscore_threshold
outliers_index = residual.dropna().index[outliers]

# Residual interpolation at outlier points
residual_adjusted = residual.copy()
residual_adjusted[outliers_index] = np.nan
residual_adjusted = residual_adjusted.interpolate()
residual_adjusted = residual_adjusted.fillna(method='bfill').fillna(method='ffill')

# Reconstruction of adjusted time series
adjusted_time_series = trend + seasonal + residual_adjusted
df_series_sales_resampled['NET_VALUE_clean'] = adjusted_time_series
df_series_sales_resampled['NET_VALUE_clean'] = df_series_sales_resampled['NET_VALUE_clean'].fillna(df_series_sales_resampled['NET_VALUE'])

# Creating calendar variables
df_series_sales_resampled['year'] = df_series_sales_resampled.index.year
df_series_sales_resampled['month'] = df_series_sales_resampled.index.month
df_series_sales_resampled['week_of_year'] = df_series_sales_resampled.index.isocalendar().week
df_series_sales_resampled['day_of_week'] = df_series_sales_resampled.index.day_of_week #  Monday=0 and Sunday=6
df_series_sales_resampled['day_of_month'] = df_series_sales_resampled.index.day

# Calculates zscore
df_series_sales_resampled['zscore'] = stats.zscore(df_series_sales_resampled[['NET_VALUE']])

# Holidays to consider if dummies are created
events_to_consider = None

# Numerical variables
numeric_features = ['day_of_week', 'day_of_month', 'month']
future_variables = ['day_of_week', 'day_of_month', 'month']

# Generate day of week dummy
dummy_day_of_week = False

# Use event binary flag or not
use_event_flag = False

# Scale target series or not - Marked as False to use the loaded scaler
scale_trgt = False

# Scale covariate series or not
scale_cov = False

# Scaling method (minmax or power)
scale_method = 'power'

# Target series column to predict
target_col = 'NET_VALUE'

# Generating series
return_list = prepare_input(df=df_series_sales_resampled, 
                            target_col=target_col,
                            numeric_features=numeric_features, 
                            use_event_flag=use_event_flag, 
                            events_to_consider=events_to_consider,
                            scale_trgt=scale_trgt,
                            scale_cov=scale_cov,
                            scale_method=scale_method,
                            dummy_day_of_week=dummy_day_of_week)

# Extracting objects from return list
serie_tgrt = return_list[0]
if use_event_flag or events_to_consider or numeric_features:
    series_cov = return_list[1]
if scale_trgt:
    transformer_trgt = return_list[2]

In [None]:
# Scaling input data
serie_tgrt = transformer_trgt.transform(serie_tgrt)

In [None]:
# Inspecting last points of input series
serie_tgrt[-100:].plot()

In [None]:
# Defining forecast horizon - 3 hours, 6 30-minute points
horizon_predict = 1

# Preparing future series
df_future_vars = pd.DataFrame(index=pd.date_range(start=serie_tgrt.time_index.max(),
                                end=serie_tgrt.time_index.max() + timedelta(days=horizon_predict + 20),
                                freq='D'))

df_future_vars['day_of_week'] = df_future_vars.index.day_of_week #  Monday=0 and Sunday=6
df_future_vars['month'] = df_future_vars.index.month
df_future_vars['day_of_month'] = df_future_vars.index.day

future_covariates = TimeSeries.from_dataframe(df_future_vars,
                                              value_cols=future_variables)

future_covariates.time_index

In [None]:
def calculate_zscore(value, mean, std_dev):
    """
    Calculates z-score of a value given the mean and standard deviation of the dataset.
    
    Parameters:
    value (float): The value to calculate z-score for.
    mean (float): The mean of the dataset.
    std_dev (float): The standard deviation of the dataset.
    
    Returns:
    float: The z-score of the value.
    """
    if std_dev == 0:
        raise ValueError("Standard deviation cannot be zero.")
    zscore = (value - mean) / std_dev
    return zscore

# Extracting parameters for zscore calculation of predicted values
# Since zscore is a predictive variable, it must be calculated for predicted steps based on original parameters
mean = df_series_sales_resampled.NET_VALUE.mean()
std_dev = df_series_sales_resampled.NET_VALUE.std()

In [None]:
# From the last date present in loaded data, predict defined horizon
unique_dates = pd.DataFrame(index=pd.date_range(start=serie_tgrt.time_index.max(),
                                end=serie_tgrt.time_index.max(),
                                freq='D')).index.strftime('%Y-%m-%d').unique()

progress_bar_general = tqdm(unique_dates, desc='Processing')
i = 0
for day in progress_bar_general:
    
    current_serie_tgrt = serie_tgrt.copy()
    current_series_cov = series_cov.copy()
    predictions_05, predictions_median, predictions_95 = [], [], []
        
    # Updates progress bar description for series
    progress_bar_general.set_description(f'Processing day {day}')

    # Performs point prediction
    pred = model.predict(
                            series=current_serie_tgrt,
                            past_covariates=current_series_cov,
                            future_covariates=future_covariates,
                            n=1,
                            predict_likelihood_parameters=True)
                        
    # Updating target series with predicted median
    current_serie_tgrt = current_serie_tgrt.append(pred[f'{target_col}_q0.50'])

    # If first point, preds assumes predicted point. Otherwise, preds receives one more predicted point
    if i == 0:
        preds = pred.copy()
    else:
        preds = preds.append(pred)
    
    # Extracting calendar information
    predicted_timestamp = current_serie_tgrt.time_index[-1]
    day_of_week = predicted_timestamp.dayofweek
    day_of_month = predicted_timestamp.day
    month = predicted_timestamp.month        

    # Updating covariate series
    new_row = pd.DataFrame({
                            'day_of_week': [day_of_week],
                            'day_of_month': [day_of_month],
                            'month': [month]
                            }, 
                            index=[predicted_timestamp])
    df_current_series_cov = current_series_cov.pd_dataframe()
    df_current_series_cov = pd.concat([df_current_series_cov, new_row])
    current_series_cov = TimeSeries.from_dataframe(df_current_series_cov)

    i = i+1

In [None]:
# Inspecting still scaled prediction
preds.plot()

In [None]:
# Last predicted point
preds.time_index.max()

In [None]:
# Creating dataframe with predictions
df_preds = preds.pd_dataframe()

# Reversing prediction scaling
df_preds[f'{target_col}_q0.05'] = transformer_trgt.inverse_transform(TimeSeries.from_dataframe(df_preds[[f'{target_col}_q0.05']])).map(replace_negatives_with_zero).values().squeeze()
df_preds[f'{target_col}_q0.50'] = transformer_trgt.inverse_transform(TimeSeries.from_dataframe(df_preds[[f'{target_col}_q0.50']])).map(replace_negatives_with_zero).values().squeeze()
df_preds[f'{target_col}_q0.95'] = transformer_trgt.inverse_transform(TimeSeries.from_dataframe(df_preds[[f'{target_col}_q0.95']])).map(replace_negatives_with_zero).values().squeeze()

# Adjusting columns
df_preds.columns = ['forecast_NET_VALUE_quartile_0_05', 'forecast_NET_VALUE_quartile_0_5', 'forecast_NET_VALUE_quartile_0_95']

# Adjusting to integer values
df_preds[df_preds.columns.tolist()] = df_preds[df_preds.columns.tolist()].round(0).astype(int)

# Including prediction timestamp in D granularity
#dt_predict = pd.Timestamp((datetime.datetime.now() - timedelta(hours=3)).strftime('%Y-%m-%d %H:%M:%S'))
#dt_predict_start = (dt_predict + pd.Timedelta(minutes=30)).floor('D')
df_preds['dt_predict'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
df_preds.sort_index(inplace=True)

In [None]:
# Inspecting prediction chart
display(df_preds.reset_index())

In [None]:
# Inspecting final data
df_preds.info()

In [None]:
# Saving data for dates not yet predicted
if len(df_preds[df_preds.index > dt_max_predicted]) > 0:
    df_spark = spark.createDataFrame(df_preds[df_preds.index > dt_max_predicted].reset_index().rename(columns={'SYSTEM_TIMESTAMP': 'dt'})) 
    mode = 'append' # overwrite or append
    overwriteSchema = 'False' # True or False
    df_spark.write.option("overwriteSchema", overwriteSchema).saveAsTable('analytics.refined_sales_orders_forecast', 
                                                                            format='delta', 
                                                                            mode=mode,
                                                                            path='/dbfs/mnt/datalake/datascience/raw/forecast_sales/output/sales_orders_forecast')
    print('Prediction registered in table!')
else:
    print('No new predictions!')