In [None]:
from pathlib import Path
import os

os.environ["KERAS_BACKEND"] = "torch"

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (
    f1_score,
    precision_recall_fscore_support,
    roc_auc_score,
    roc_curve,
    auc,
    precision_recall_curve,
    confusion_matrix
)
from sklearn.utils import class_weight
from keras.models import Sequential, Model
from keras.layers import Input, LSTM, Dense, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.regularizers import l2
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from var import DATA_OUT, IMAGE_OUT, FORECAST_HOURS_IN_ADVANCE

In [None]:
df = pd.read_pickle(Path(DATA_OUT, 'df_dataset.pickle'))

In [None]:
X = df[
    [
        'ie_fix',
        'ie_mav_6h',
        'iu_fix',
        'iu_mav_6h',
        'hf',
        'f_107_adj',
        'hp_30',
        'smr',
        'solar_zenith_angle',
    ]
].copy()

y = df[f'tid_within_{FORECAST_HOURS_IN_ADVANCE}h'].copy()

In [None]:
X_train = X.loc['2018':'2020'].copy()
y_train = y.loc['2018':'2020'].copy()

X_test = X.loc['2021':'2022'].copy()
y_test = y.loc['2021':'2022'].copy()

## Feature scaling

In [None]:
standard_features = [
    'ie_fix',
    'ie_mav_6h',
    'iu_fix',
    'iu_mav_6h',
    'f_107_adj',
    'smr',
]

minmax_features = [
    'hf',
    'hp_30',
    'solar_zenith_angle',
]

standard_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ]
)

minmax_transformer = Pipeline(
    steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', MinMaxScaler(feature_range=(0,1)))
    ]
)


preprocessor = ColumnTransformer(
    transformers=[
        ('standard', standard_transformer, standard_features),
        ('minmax', minmax_transformer, minmax_features)
    ]
)

preprocessing = Pipeline(
    steps=[('preprocessor', preprocessor)]
)

# Fit on training data
preprocessing.fit(X_train)

In [None]:
X_train_pp, X_test_pp = preprocessing.transform(X_train), preprocessing.transform(X_test)

## Data transformation for LSTM network

In [None]:
n_days = 20
T = 2 * 24 * n_days

In [None]:
X_train_lstm, y_train_lstm = [], []

for i in range(y_train.shape[0] - (T-1)):
    X_train_lstm.append(X_train_pp[i: i+T])
    y_train_lstm.append(y_train.iloc[i + (T-1)])

X_train_lstm, y_train_lstm = np.array(X_train_lstm), np.array(y_train_lstm).reshape(-1,1)

In [None]:
print(f'Train data dimensions: {X_train_lstm.shape}, {y_train_lstm.shape}')

In [None]:
# I want to use a T-days window of input data for predicting target class
# It means I need to prepend (T-1) last train records to the 1st test window

prepend_features = X_train[-(T-1):]
X_test = pd.concat([prepend_features, X_test], axis=0)

In [None]:
X_test_lstm, y_test_lstm = [], []

for i in range(y_test.shape[0]):
    X_test_lstm.append(X_test[i: i+T])
    y_test_lstm.append(y_test.iloc[i])

X_test_lstm, y_test_lstm = np.array(X_test_lstm), np.array(y_test_lstm).reshape(-1,1)  

In [None]:
print(f'Test data dimensions: {X_test_lstm.shape}, {y_test_lstm.shape}')

## LSTM model

In [None]:
LAYERS = [32, 16, 1]                 # number of units in hidden and output layers
M_TRAIN = X_train_lstm.shape[0]      # number of training examples (2D)
M_TEST = X_test_lstm.shape[0]        # number of test examples (2D)
N = X_train_lstm.shape[2]            # number of features
BATCH = 320                          # batch size
EPOCH = 10                           # number of epochs
LR = 5e-2                            # learning rate of the gradient descent
LAMBD = 3e-2                         # lambda in L2 regularizaion
DP = 0.1                             # dropout rate
RDP = 0.0                            # recurrent dropout rate

In [None]:
model = Sequential()

model.add(
    Input(shape=(T, N))
)

model.add(
    LSTM(
        units=LAYERS[0],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        kernel_regularizer=l2(LAMBD),
        recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=True,
        return_state=False,
        stateful=False,
        )
)

# model.add(BatchNormalization())
# 
# model.add(
#     LSTM(
#         units=LAYERS[1],
#         activation='tanh',
#         recurrent_activation='hard_sigmoid',
#         kernel_regularizer=l2(LAMBD),
#         recurrent_regularizer=l2(LAMBD),
#         dropout=DP,
#         recurrent_dropout=RDP,
#         return_sequences=True,
#         return_state=False,
#         stateful=False,
#         )
# )

model.add(BatchNormalization())

model.add(
    LSTM(
        units=LAYERS[1],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        kernel_regularizer=l2(LAMBD),
        recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=False,
        return_state=False,
        stateful=False,
        )
)

model.add(
    Dense(units=LAYERS[2], activation='sigmoid')
)

In [None]:
model.compile(
    loss='binary_crossentropy',
    metrics=['f1_score'],
    optimizer=Adam(learning_rate=LR)
)

In [None]:
print(model.summary())

In [None]:
# Learning rate decay
lr_decay = ReduceLROnPlateau(
    monitor='f1_score',
    mode='max',
    patience=1,
    verbose=1, 
    factor=0.2,
    min_lr=1e-5,
)

# Early Stopping
early_stop = EarlyStopping(
    monitor='f1_score',
    min_delta=0, 
    patience=3,
    verbose=1,
    mode='max',
    baseline=0,
    restore_best_weights=True,
)

In [None]:
weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(y_train),
    y=y_train,
)

In [None]:
train_history = model.fit(
    X_train_lstm,
    y_train_lstm,
    epochs=EPOCH,
    batch_size=BATCH,
    # validation_split=1/7,
    validation_data=(X_test_lstm, y_test_lstm),
    shuffle=False,
    verbose='auto',
    callbacks=[lr_decay, early_stop],
    # class_weight=dict(enumerate(weights)),
)