In [None]:
from pathlib import Path
import os

os.environ["KERAS_BACKEND"] = "torch" # "jax"
# os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"]="false"
# os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]=".50"
# os.environ["XLA_PYTHON_CLIENT_ALLOCATOR"]="platform"

import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import (
    f1_score,
    precision_recall_fscore_support,
    roc_auc_score,
    roc_curve,
    auc,
    precision_recall_curve,
    confusion_matrix
)
from keras.models import Sequential, Model
from keras.layers import Input, LSTM, Dense, BatchNormalization
from keras.optimizers import Adam
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from var import DATA_OUT, IMAGE_OUT, FORECAST_HOURS_IN_ADVANCE

In [None]:
df = pd.read_pickle(Path(DATA_OUT, 'df_dataset.pickle'))

In [None]:
X = df[
    [
        'ie_fix',
        'ie_mav_6h',
        'iu_fix',
        'iu_mav_6h',
        'hf',
        'hf_mav_2h',
        'f_107_adj',
        'hp_30',
        'smr',
        'solar_zenith_angle',
    ]
].copy()

y = df[f'tid_within_{FORECAST_HOURS_IN_ADVANCE}h'].copy()

In [None]:
X_train = X.loc['2014':'2015'].fillna(0).copy()
y_train = y.loc['2014':'2015'].copy()

X_test = X.loc['2016'].fillna(0).copy()
y_test = y.loc['2016'].copy()

In [None]:
n_days = 20
T = 2 * 24 * n_days

X_train_lstm, y_train_lstm = [], []

for i in range(y_train.shape[0] - (T-1)):
    X_train_lstm.append(X_train.iloc[i: i+T].values)
    y_train_lstm.append(y_train.iloc[i + (T-1)])

X_train_lstm, y_train_lstm = np.array(X_train_lstm), np.array(y_train_lstm).reshape(-1,1)

In [None]:
print(f'Train data dimensions: {X_train_lstm.shape}, {y_train_lstm.shape}')

In [None]:
# I want to use a T-days window of input data for predicting target class
# It means I need to prepend (T-1) last train records to the 1st test window

prepend_features = X_train.iloc[-(T-1):]
X_test = pd.concat([prepend_features, X_test], axis=0)

In [None]:
X_test_lstm, y_test_lstm = [], []

for i in range(y_test.shape[0]):
    X_test_lstm.append(X_test.iloc[i: i+T].values)
    y_test_lstm.append(y_test.iloc[i])

X_test_lstm, y_test_lstm = np.array(X_test_lstm), np.array(y_test_lstm).reshape(-1,1)  

In [None]:
print(f'Test data dimensions: {X_test_lstm.shape}, {y_test_lstm.shape}')

In [None]:
LAYERS = [64, 32, 1]                 # number of units in hidden and output layers
M_TRAIN = X_train_lstm.shape[0]      # number of training examples (2D)
M_TEST = X_test_lstm.shape[0]        # number of test examples (2D), full=X_test.shape[0]
N = X_train_lstm.shape[2]            # number of features
BATCH = 320                          # batch size
EPOCH = 10                           # number of epochs
LR = 5e-2                            # learning rate of the gradient descent
LAMBD = 3e-2                         # lambda in L2 regularizaion
DP = 0.0                             # dropout rate
RDP = 0.0                            # recurrent dropout rate

In [None]:
# Build the Model
model = Sequential()

model.add(
    LSTM(
        input_shape=(T, N),
        units=LAYERS[0],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        # kernel_regularizer=l2(LAMBD),
        # recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=True,
        return_state=False,
        stateful=False,
        unroll=False
        )
)

model.add(BatchNormalization())

model.add(
    LSTM(
        units=LAYERS[1],
        activation='tanh',
        recurrent_activation='hard_sigmoid',
        # kernel_regularizer=l2(LAMBD), recurrent_regularizer=l2(LAMBD),
        dropout=DP,
        recurrent_dropout=RDP,
        return_sequences=False,
        return_state=False,
        stateful=False,
        unroll=False
        )
)

model.add(
    Dense(units=LAYERS[2], activation='sigmoid')
)

In [None]:
model.compile(
    loss='binary_crossentropy',
    metrics=['accuracy'],
    optimizer=Adam(learning_rate=LR)
)

In [None]:
print(model.summary())

In [None]:
model.fit(
    X_train_lstm,
    y_train_lstm,
    epochs=EPOCH,
    batch_size=BATCH,
    validation_split=0.0,
    validation_data=(X_test_lstm[:M_TEST], y_test_lstm[:M_TEST]),
    shuffle=True,
    verbose='auto',
    #callbacks=[lr_decay, early_stop],
)