# LSTM model

Overall Process:
- LSTM model - Sales & Flag Weekend features
- LSTM/CNN model - Sales & Flag Weekend features
- LSTM model - Sales feature only
- LSTM/CNN model - Sales feature only

Packages:
1. pandas
2. numpy
3. scikit-learn
4. tensorflow
5. matplotlib

In [None]:
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from sklearn.metrics import mean_absolute_percentage_error as MAPE_metrics
from sklearn.preprocessing import StandardScaler

# Prediction timeframe: 14 days
# Training timeframe: 56 days (4 weeks)
TEST_SIZE = 14
TRAIN_SIZE = TEST_SIZE * 4

session = get_active_session()
session.use_database("ml")
session.use_schema("retail_store")

transactions_df = session.table("store_2_preprocessed_transactions")
transactions_df = transactions_df.to_pandas()
transactions_df = transactions_df.drop("STORE_CHAIN_ID", axis=1)
transactions_df["DATE"] = pd.to_datetime(transactions_df["DATE"])
transactions_df = transactions_df.sort_values(by='DATE')
transactions_df.set_index("DATE", inplace=True)

- Functions for future use

In [None]:
def plot_graph(train_values, actual_values, predictions):
    """
    Plot a graph showing train data, actual values and predictions.
    
    The function plots three lines:
    1. Training data values
    2. Actual test values 
    3. Predicted values
    
    The x-axis represents time steps and y-axis represents the values.

    Args:
        train_values: Array of training data values to plot
        actual_values: Array of actual test values to plot
        predictions: Array of predicted values to plot
    """
    x_train = np.linspace(0, len(train_values), len(train_values))
    x = np.linspace(len(train_values), len(train_values) + len(actual_values), len(actual_values))

    plt.plot(x_train, train_values)
    plt.plot(x, actual_values)
    plt.plot(x, predictions)
    plt.legend(["Train Data", "Actual Sales", "Predictions"])

    return


def calculate_smape(actual_values, predictions):
    """
    Calculate Symmetric Mean Absolute Percentage Error (SMAPE) between actual and predicted values.
    
    Args:
        actual_values: Array of actual values
        predictions: Array of predicted values
        
    Returns:
        float: SMAPE score as a percentage between 0 and 100
    """
    return 100/len(actual_values) * np.sum(2 * np.abs(predictions - actual_values) / (np.abs(actual_values) + np.abs(predictions)))


def create_sequences(data, past_days=TRAIN_SIZE, future_days=TEST_SIZE):
    """
    Create sequences of past data and future values for time series prediction.
    
    Args:
        data: Time series dataframe
        past_days: Number of past days to use as input features (default: TRAIN_SIZE)
        future_days: Number of future days to predict (default: TEST_SIZE)
        
    Returns:
        tuple: (X, y) where:
            X: Training data
            y: Validation data
    """
    X, y = [], []

    for i in range(past_days, len(data) - future_days + 1):
        X.append(data[i - past_days:i])
        y.append(data[i:i + future_days, 0])
        
    return np.array(X), np.array(y)

## LSTM & LSTM/CNN - Sales & Flag Weekend
### 1. Prepare sequences for LSTM (past 2 months to predict future fortnight)

Optimal results (LSTM):
- MAPE value: 0.0687
- SMAPE value: 6.770
- Tuning time: 48.833s
- Fitting time: 0.52s

Optimal results (LSTM/CNN):
- MAPE value: 0.0946
- SMAPE value: 9.048
- Tuning time: 40.766s
- Fitting time: 0.62s

In [None]:
# Extracting daily sales and flag weekend features
features = ["TOTAL_SALES", "FLAG_WEEKEND"]
data = transactions_df[features].values

# Scaling the sales data only
scaler = StandardScaler()
scaled_sales_data = scaler.fit_transform(data[:,[0]])

# Combine scaled sales data with binary flag weekend feature
flag_weekend = data[:,[1]]
data = np.hstack([scaled_sales_data, flag_weekend])

# Create input sequence for LSTM model
X, y = create_sequences(data)

### 2. Create LSTM model & LSTM/CNN combination model

In [None]:
# LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(256, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
lstm_model.add(Dropout(0.2))
lstm_model.add(LSTM(128))
lstm_model.add(Dropout(0.2))
lstm_model.add(Dense(TEST_SIZE))  
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.summary()


# LSTM/CNN combination model
lstm_cnn_model = Sequential()
lstm_cnn_model.add(Conv1D(filters=128, kernel_size=5, input_shape=(X.shape[1], X.shape[2])))
lstm_cnn_model.add(MaxPooling1D(pool_size=2))
lstm_cnn_model.add(LSTM(256, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
lstm_cnn_model.add(Dropout(0.2))
lstm_cnn_model.add(LSTM(128))
lstm_cnn_model.add(Dropout(0.2))
lstm_cnn_model.add(Dense(TEST_SIZE)) 
lstm_cnn_model.compile(optimizer='adam', loss='mse')
lstm_cnn_model.summary()

### 3. Train the LSTM & LSTM/CNN model

In [None]:
# Fit LSTM model
lstm_start_time = time.time()
lstm_model.fit(X, y, epochs=10, validation_split=0.2)
lstm_end_time = time.time()


# Fit LSTM/CNN combination model
lstm_cnn_start_time = time.time()
lstm_cnn_model.fit(X, y, epochs=10, validation_split=0.2)
lstm_cnn_end_time = time.time()

In [None]:
training_time = lstm_end_time - lstm_start_time
print(f"LSTM model training time: {training_time :.3f}")

training_time = lstm_cnn_end_time - lstm_cnn_start_time
print(f"LSTM/CNN model training time: {training_time :.3f}")

### 4. Make and plot predictions

In [None]:
last_sequence = data[-TRAIN_SIZE:].reshape(1, TRAIN_SIZE, len(features)) 

# LSTM prediction
start_time = time.time()
lstm_predicted_values = lstm_model.predict(last_sequence)
end_time = time.time()

fitting_time = end_time - start_time
print(f"LSTM model fitting time: {fitting_time :.2f}")

# Inverse transform predictions to original scale
lstm_predicted_values = lstm_predicted_values.reshape(-1, 1)
lstm_predicted_values = scaler.inverse_transform(lstm_predicted_values).flatten()

print(f"LSTM sales prediction: {lstm_predicted_values}")


# LSTM/CNN prediction 
start_time = time.time()
lstm_cnn_predicted_values = lstm_cnn_model.predict(last_sequence)
end_time = time.time()

fitting_time = end_time - start_time
print(f"LSTM model fitting time: {fitting_time :.2f}")

# Inverse transform predictions to original scale
lstm_cnn_predicted_values = lstm_cnn_predicted_values.reshape(-1, 1)
lstm_cnn_predicted_values = scaler.inverse_transform(lstm_cnn_predicted_values).flatten()

print(f"LSTM/CNN sales prediction: {lstm_cnn_predicted_values}")


In [None]:
# Plot LSTM prediction results
train_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE-TRAIN_SIZE:-TEST_SIZE]
actual_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE:]
plot_graph(train_values, actual_values, lstm_predicted_values)

In [None]:
# Plot LSTM/CNN prediction results
train_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE-TRAIN_SIZE:-TEST_SIZE]
actual_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE:]
plot_graph(train_values, actual_values, lstm_cnn_predicted_values)

### 5. Calculate MAPE and SMAPE metrics

In [None]:
# LSTM result metrics
mape = MAPE_metrics(actual_values, lstm_predicted_values)
smape = calculate_smape(actual_values, lstm_predicted_values)

print(f"LSTM MAPE value for last 14 days prediction: {mape}")
print(f"LSTM SMAPE value for last 14 days prediction: {smape}")


# LSTM/CNN result metrics
mape = MAPE_metrics(actual_values, lstm_cnn_predicted_values)
smape = calculate_smape(actual_values, lstm_cnn_predicted_values)

print(f"LSTM/CNN MAPE value for last 14 days prediction: {mape}")
print(f"LSTM/CNN SMAPE value for last 14 days prediction: {smape}")

## LSTM - Sales
### 1. Prepare sequences for LSTM (past 2 months to predict future fortnight)

Optimal results (LSTM):
- MAPE value: 0.058
- SMAPE value: 5.637
- Tuning time: 52.58s
- Fitting time: 0.24s

Optimal results (LSTM/CNN):
- MAPE value: 0.053
- SMAPE value: 5.193
- Tuning time: 35.705s
- Fitting time: 0.32s

In [None]:
# Selecting only the daily sales feature
data = transactions_df[['TOTAL_SALES']].values

# Scaling the data
scaler = StandardScaler()
data = scaler.fit_transform(data)

X, y = create_sequences(data)

### 2. Train the LSTM model

In [None]:
# Fit LSTM model
lstm_start_time = time.time()
lstm_model.fit(X, y, epochs=10, validation_split=0.2)
lstm_end_time = time.time()


# Fit LSTM/CNN combination model
lstm_cnn_start_time = time.time()
lstm_cnn_model.fit(X, y, epochs=10, validation_split=0.2)
lstm_cnn_end_time = time.time()

In [None]:
training_time = lstm_end_time - lstm_start_time
print(f"LSTM model training time: {training_time :.3f}")

training_time = lstm_cnn_end_time - lstm_cnn_start_time
print(f"LSTM/CNN model training time: {training_time :.3f}")

### 3. Make and plot predictions


In [None]:
last_sequence = data[-TEST_SIZE*4:].reshape(1, TEST_SIZE*4, 1) 

# LSTM prediction
start_time = time.time()
lstm_predicted_values = lstm_model.predict(last_sequence)
end_time = time.time()

fitting_time = end_time - start_time
print(f"LSTM model fitting time: {fitting_time :.2f}")

# Inverse transform predictions to original scale
lstm_predicted_values = lstm_predicted_values.reshape(-1, 1)
lstm_predicted_values = scaler.inverse_transform(lstm_predicted_values).flatten()

print(f"LSTM sales prediction: {lstm_predicted_values}")


# LSTM/CNN prediction 
start_time = time.time()
lstm_cnn_predicted_values = lstm_cnn_model.predict(last_sequence)
end_time = time.time()

fitting_time = end_time - start_time
print(f"LSTM model fitting time: {fitting_time :.2f}")

# Inverse transform predictions to original scale
lstm_cnn_predicted_values = lstm_cnn_predicted_values.reshape(-1, 1)
lstm_cnn_predicted_values = scaler.inverse_transform(lstm_cnn_predicted_values).flatten()

print(f"LSTM/CNN sales prediction: {lstm_cnn_predicted_values}")

In [None]:
# Plot LSTM prediction results
train_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE-TRAIN_SIZE:-TEST_SIZE]
actual_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE:]
plot_graph(train_values, actual_values, lstm_predicted_values)

In [None]:
# Plot LSTM/CNN prediction results
train_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE-TRAIN_SIZE:-TEST_SIZE]
actual_values = transactions_df["TOTAL_SALES"].iloc[-TEST_SIZE:]
plot_graph(train_values, actual_values, lstm_cnn_predicted_values)

### 4. Calculate MAPE and SMAPE metrics

In [None]:
# LSTM result metrics

mape = MAPE_metrics(actual_values, lstm_predicted_values)
smape = calculate_smape(actual_values, lstm_predicted_values)

print(f"LSTM MAPE value for last 14 days prediction: {mape}")
print(f"LSTM SMAPE value for last 14 days prediction: {smape}")


# LSTM/CNN result metrics
mape = MAPE_metrics(actual_values, lstm_cnn_predicted_values)
smape = calculate_smape(actual_values, lstm_cnn_predicted_values)

print(f"LSTM/CNN MAPE value for last 14 days prediction: {mape}")
print(f"LSTM/CNN SMAPE value for last 14 days prediction: {smape}")